• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader Misc Tests for VK_EXT_mesh_shader
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderMiscTests.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28 
29 #include "vkBuilderUtil.hpp"
30 #include "vkImageWithMemory.hpp"
31 #include "vkBufferWithMemory.hpp"
32 #include "vkObjUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkImageUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 
38 #include "tcuDefs.hpp"
39 #include "tcuVectorType.hpp"
40 #include "tcuImageCompare.hpp"
41 #include "tcuTexture.hpp"
42 #include "tcuTextureUtil.hpp"
43 #include "tcuMaybe.hpp"
44 #include "tcuStringTemplate.hpp"
45 #include "tcuTestLog.hpp"
46 
47 #include "deRandom.hpp"
48 
49 #include <cstdint>
50 #include <memory>
51 #include <utility>
52 #include <vector>
53 #include <string>
54 #include <sstream>
55 #include <map>
56 #include <type_traits>
57 #include <limits>
58 
59 namespace vkt
60 {
61 namespace MeshShader
62 {
63 
64 namespace
65 {
66 
67 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
68 
69 using namespace vk;
70 
71 // Output images will use this format.
getOutputFormat()72 VkFormat getOutputFormat ()
73 {
74 	return VK_FORMAT_R8G8B8A8_UNORM;
75 }
76 
77 // Threshold that's reasonable for the previous format.
getCompareThreshold()78 float getCompareThreshold ()
79 {
80 	return 0.005f; // 1/256 < 0.005 < 2/256
81 }
82 
83 // Check mesh shader support.
genericCheckSupport(Context & context,bool requireTaskShader,bool requireVertexStores)84 void genericCheckSupport (Context& context, bool requireTaskShader, bool requireVertexStores)
85 {
86 	checkTaskMeshShaderSupportEXT(context, requireTaskShader, true);
87 
88 	if (requireVertexStores)
89 	{
90 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
91 	}
92 }
93 
94 struct MiscTestParams
95 {
96 	tcu::Maybe<tcu::UVec3>	taskCount;
97 	tcu::UVec3				meshCount;
98 
99 	uint32_t				width;
100 	uint32_t				height;
101 
MiscTestParamsvkt::MeshShader::__anonb99b7fd80111::MiscTestParams102 	MiscTestParams (const tcu::Maybe<tcu::UVec3>& taskCount_, const tcu::UVec3& meshCount_, uint32_t width_, uint32_t height_)
103 		: taskCount		(taskCount_)
104 		, meshCount		(meshCount_)
105 		, width			(width_)
106 		, height		(height_)
107 	{}
108 
109 	// Makes the class polymorphic and allows the right destructor to be used for subclasses.
~MiscTestParamsvkt::MeshShader::__anonb99b7fd80111::MiscTestParams110 	virtual ~MiscTestParams () {}
111 
needsTaskShadervkt::MeshShader::__anonb99b7fd80111::MiscTestParams112 	bool needsTaskShader () const
113 	{
114 		return static_cast<bool>(taskCount);
115 	}
116 
drawCountvkt::MeshShader::__anonb99b7fd80111::MiscTestParams117 	tcu::UVec3 drawCount () const
118 	{
119 		if (needsTaskShader())
120 			return taskCount.get();
121 		return meshCount;
122 	}
123 };
124 
125 using ParamsPtr = std::unique_ptr<MiscTestParams>;
126 
127 class MeshShaderMiscCase : public vkt::TestCase
128 {
129 public:
130 					MeshShaderMiscCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params);
~MeshShaderMiscCase(void)131 	virtual			~MeshShaderMiscCase		(void) {}
132 
133 	void			checkSupport			(Context& context) const override;
134 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
135 
136 protected:
137 	std::unique_ptr<MiscTestParams> m_params;
138 };
139 
MeshShaderMiscCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)140 MeshShaderMiscCase::MeshShaderMiscCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
141 	: vkt::TestCase	(testCtx, name, description)
142 	, m_params		(params.release())
143 {}
144 
checkSupport(Context & context) const145 void MeshShaderMiscCase::checkSupport (Context& context) const
146 {
147 	genericCheckSupport(context, m_params->needsTaskShader(), /*requireVertexStores*/false);
148 }
149 
150 // Adds the generic fragment shader. To be called by subclasses.
initPrograms(vk::SourceCollections & programCollection) const151 void MeshShaderMiscCase::initPrograms (vk::SourceCollections& programCollection) const
152 {
153 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
154 
155 	std::string frag =
156 		"#version 450\n"
157 		"#extension GL_EXT_mesh_shader : enable\n"
158 		"\n"
159 		"layout (location=0) in perprimitiveEXT vec4 primitiveColor;\n"
160 		"layout (location=0) out vec4 outColor;\n"
161 		"\n"
162 		"void main ()\n"
163 		"{\n"
164 		"    outColor = primitiveColor;\n"
165 		"}\n"
166 		;
167 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag) << buildOptions;
168 }
169 
170 class MeshShaderMiscInstance : public vkt::TestInstance
171 {
172 public:
MeshShaderMiscInstance(Context & context,const MiscTestParams * params)173 					MeshShaderMiscInstance	(Context& context, const MiscTestParams* params)
174 						: vkt::TestInstance	(context)
175 						, m_params			(params)
176 						, m_referenceLevel	()
177 					{
178 					}
179 
180 	void			generateSolidRefLevel	(const tcu::Vec4& color, std::unique_ptr<tcu::TextureLevel>& output);
181 	virtual void	generateReferenceLevel	() = 0;
182 
183 	virtual bool	verifyResult			(const tcu::ConstPixelBufferAccess& resultAccess, const tcu::TextureLevel& referenceLevel) const;
184 	virtual bool	verifyResult			(const tcu::ConstPixelBufferAccess& resultAccess) const;
185 	tcu::TestStatus	iterate					() override;
186 
187 protected:
188 	const MiscTestParams*				m_params;
189 	std::unique_ptr<tcu::TextureLevel>	m_referenceLevel;
190 };
191 
generateSolidRefLevel(const tcu::Vec4 & color,std::unique_ptr<tcu::TextureLevel> & output)192 void MeshShaderMiscInstance::generateSolidRefLevel (const tcu::Vec4& color, std::unique_ptr<tcu::TextureLevel>& output)
193 {
194 	const auto format		= getOutputFormat();
195 	const auto tcuFormat	= mapVkFormat(format);
196 
197 	const auto iWidth		= static_cast<int>(m_params->width);
198 	const auto iHeight		= static_cast<int>(m_params->height);
199 
200 	output.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
201 
202 	const auto access		= output->getAccess();
203 
204 	// Fill with solid color.
205 	tcu::clear(access, color);
206 }
207 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess) const208 bool MeshShaderMiscInstance::verifyResult (const tcu::ConstPixelBufferAccess& resultAccess) const
209 {
210 	return verifyResult(resultAccess, *m_referenceLevel);
211 }
212 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess,const tcu::TextureLevel & referenceLevel) const213 bool MeshShaderMiscInstance::verifyResult (const tcu::ConstPixelBufferAccess& resultAccess, const tcu::TextureLevel& referenceLevel) const
214 {
215 	const auto referenceAccess = referenceLevel.getAccess();
216 
217 	const auto refWidth		= referenceAccess.getWidth();
218 	const auto refHeight	= referenceAccess.getHeight();
219 	const auto refDepth		= referenceAccess.getDepth();
220 
221 	const auto resWidth		= resultAccess.getWidth();
222 	const auto resHeight	= resultAccess.getHeight();
223 	const auto resDepth		= resultAccess.getDepth();
224 
225 	DE_ASSERT(resWidth == refWidth || resHeight == refHeight || resDepth == refDepth);
226 
227 	// For release builds.
228 	DE_UNREF(refWidth);
229 	DE_UNREF(refHeight);
230 	DE_UNREF(refDepth);
231 	DE_UNREF(resWidth);
232 	DE_UNREF(resHeight);
233 	DE_UNREF(resDepth);
234 
235 	const auto outputFormat		= getOutputFormat();
236 	const auto expectedFormat	= mapVkFormat(outputFormat);
237 	const auto resFormat		= resultAccess.getFormat();
238 	const auto refFormat		= referenceAccess.getFormat();
239 
240 	DE_ASSERT(resFormat == expectedFormat && refFormat == expectedFormat);
241 
242 	// For release builds
243 	DE_UNREF(expectedFormat);
244 	DE_UNREF(resFormat);
245 	DE_UNREF(refFormat);
246 
247 	auto&			log				= m_context.getTestContext().getLog();
248 	const auto		threshold		= getCompareThreshold();
249 	const tcu::Vec4	thresholdVec	(threshold, threshold, threshold, threshold);
250 
251 	return tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, thresholdVec, tcu::COMPARE_LOG_ON_ERROR);
252 }
253 
iterate()254 tcu::TestStatus MeshShaderMiscInstance::iterate ()
255 {
256 	const auto&		vkd			= m_context.getDeviceInterface();
257 	const auto		device		= m_context.getDevice();
258 	auto&			alloc		= m_context.getDefaultAllocator();
259 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
260 	const auto		queue		= m_context.getUniversalQueue();
261 
262 	const auto		imageFormat	= getOutputFormat();
263 	const auto		tcuFormat	= mapVkFormat(imageFormat);
264 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
265 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
266 
267 	const VkImageCreateInfo colorBufferInfo =
268 	{
269 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
270 		nullptr,								//	const void*				pNext;
271 		0u,										//	VkImageCreateFlags		flags;
272 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
273 		imageFormat,							//	VkFormat				format;
274 		imageExtent,							//	VkExtent3D				extent;
275 		1u,										//	uint32_t				mipLevels;
276 		1u,										//	uint32_t				arrayLayers;
277 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
278 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
279 		imageUsage,								//	VkImageUsageFlags		usage;
280 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
281 		0u,										//	uint32_t				queueFamilyIndexCount;
282 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
283 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
284 	};
285 
286 	// Create color image and view.
287 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
288 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
289 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
290 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
291 
292 	// Create a memory buffer for verification.
293 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
294 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
295 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
296 
297 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
298 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
299 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
300 
301 	// Pipeline layout.
302 	const auto pipelineLayout = makePipelineLayout(vkd, device);
303 
304 	// Shader modules.
305 	const auto&	binaries	= m_context.getBinaryCollection();
306 	const auto	hasTask		= binaries.contains("task");
307 
308 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
309 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
310 
311 	Move<VkShaderModule> taskShader;
312 	if (hasTask)
313 		taskShader = createShaderModule(vkd, device, binaries.get("task"));
314 
315 	// Render pass.
316 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
317 
318 	// Framebuffer.
319 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
320 
321 	// Viewport and scissor.
322 	const std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
323 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(imageExtent));
324 
325 	// Color blending.
326 	const auto									colorWriteMask	= (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
327 	const VkPipelineColorBlendAttachmentState	blendAttState	=
328 	{
329 		VK_TRUE,				//	VkBool32				blendEnable;
330 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			srcColorBlendFactor;
331 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			dstColorBlendFactor;
332 		VK_BLEND_OP_ADD,		//	VkBlendOp				colorBlendOp;
333 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			srcAlphaBlendFactor;
334 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			dstAlphaBlendFactor;
335 		VK_BLEND_OP_ADD,		//	VkBlendOp				alphaBlendOp;
336 		colorWriteMask,			//	VkColorComponentFlags	colorWriteMask;
337 	};
338 
339 	const VkPipelineColorBlendStateCreateInfo colorBlendInfo =
340 	{
341 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//	VkStructureType								sType;
342 		nullptr,													//	const void*									pNext;
343 		0u,															//	VkPipelineColorBlendStateCreateFlags		flags;
344 		VK_FALSE,													//	VkBool32									logicOpEnable;
345 		VK_LOGIC_OP_OR,												//	VkLogicOp									logicOp;
346 		1u,															//	uint32_t									attachmentCount;
347 		&blendAttState,												//	const VkPipelineColorBlendAttachmentState*	pAttachments;
348 		{ 0.0f, 0.0f, 0.0f, 0.0f },									//	float										blendConstants[4];
349 	};
350 
351 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
352 		taskShader.get(), meshShader.get(), fragShader.get(),
353 		renderPass.get(), viewports, scissors, 0u/*subpass*/,
354 		nullptr, nullptr, nullptr, &colorBlendInfo);
355 
356 	// Command pool and buffer.
357 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
358 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
359 	const auto cmdBuffer	= cmdBufferPtr.get();
360 
361 	beginCommandBuffer(vkd, cmdBuffer);
362 
363 	// Run pipeline.
364 	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 0.0f);
365 	const auto		drawCount	= m_params->drawCount();
366 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
367 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
368 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
369 	endRenderPass(vkd, cmdBuffer);
370 
371 	// Copy color buffer to verification buffer.
372 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
373 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
374 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
375 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
376 
377 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
378 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
379 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
380 
381 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
382 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
383 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
384 
385 	endCommandBuffer(vkd, cmdBuffer);
386 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
387 
388 	// Generate reference image and compare results.
389 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
390 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
391 
392 	generateReferenceLevel();
393 	invalidateAlloc(vkd, device, verificationBufferAlloc);
394 	if (!verifyResult(verificationAccess))
395 		TCU_FAIL("Result does not match reference; check log for details");
396 
397 	return tcu::TestStatus::pass("Pass");
398 }
399 
400 // Verify passing more complex data between the task and mesh shaders.
401 class ComplexTaskDataCase : public MeshShaderMiscCase
402 {
403 public:
ComplexTaskDataCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)404 					ComplexTaskDataCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
405 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
406 					{}
407 
408 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
409 	TestInstance*	createInstance			(Context& context) const override;
410 };
411 
412 class ComplexTaskDataInstance : public MeshShaderMiscInstance
413 {
414 public:
ComplexTaskDataInstance(Context & context,const MiscTestParams * params)415 	ComplexTaskDataInstance (Context& context, const MiscTestParams* params)
416 		: MeshShaderMiscInstance (context, params)
417 	{}
418 
419 	void	generateReferenceLevel	() override;
420 };
421 
generateReferenceLevel()422 void ComplexTaskDataInstance::generateReferenceLevel ()
423 {
424 	const auto format		= getOutputFormat();
425 	const auto tcuFormat	= mapVkFormat(format);
426 
427 	const auto iWidth		= static_cast<int>(m_params->width);
428 	const auto iHeight		= static_cast<int>(m_params->height);
429 
430 	const auto halfWidth	= iWidth / 2;
431 	const auto halfHeight	= iHeight / 2;
432 
433 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
434 
435 	const auto access		= m_referenceLevel->getAccess();
436 
437 	// Each image quadrant gets a different color.
438 	for (int y = 0; y < iHeight; ++y)
439 	for (int x = 0; x < iWidth; ++x)
440 	{
441 		const float	red			= ((y < halfHeight) ? 0.0f : 1.0f);
442 		const float	green		= ((x < halfWidth)  ? 0.0f : 1.0f);
443 		const auto	refColor	= tcu::Vec4(red, green, 1.0f, 1.0f);
444 		access.setPixel(refColor, x, y);
445 	}
446 }
447 
initPrograms(vk::SourceCollections & programCollection) const448 void ComplexTaskDataCase::initPrograms (vk::SourceCollections& programCollection) const
449 {
450 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
451 
452 	// Add the generic fragment shader.
453 	MeshShaderMiscCase::initPrograms(programCollection);
454 
455 	const std::string taskDataDecl =
456 		"struct RowId {\n"
457 		"    uint id;\n"
458 		"};\n"
459 		"\n"
460 		"struct WorkGroupData {\n"
461 		"    float WorkGroupIdPlusOnex1000Iota[10];\n"
462 		"    RowId rowId;\n"
463 		"    uvec3 WorkGroupIdPlusOnex2000Iota;\n"
464 		"    vec2  WorkGroupIdPlusOnex3000Iota;\n"
465 		"};\n"
466 		"\n"
467 		"struct ExternalData {\n"
468 		"    float OneMillion;\n"
469 		"    uint  TwoMillion;\n"
470 		"    WorkGroupData workGroupData;\n"
471 		"};\n"
472 		"\n"
473 		"struct TaskData {\n"
474 		"    uint yes;\n"
475 		"    ExternalData externalData;\n"
476 		"};\n"
477 		"taskPayloadSharedEXT TaskData td;\n"
478 		;
479 
480 	{
481 		std::ostringstream task;
482 		task
483 			<< "#version 450\n"
484 			<< "#extension GL_EXT_mesh_shader : enable\n"
485 			<< "\n"
486 			<< "layout (local_size_x=1) in;\n"
487 			<< "\n"
488 			<< taskDataDecl
489 			<< "\n"
490 			<< "void main ()\n"
491 			<< "{\n"
492 			<< "    td.yes = 1u;\n"
493 			<< "    td.externalData.OneMillion = 1000000.0;\n"
494 			<< "    td.externalData.TwoMillion = 2000000u;\n"
495 			<< "    for (uint i = 0; i < 10; i++) {\n"
496 			<< "        td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] = float((gl_WorkGroupID.x + 1u) * 1000 + i);\n"
497 			<< "    }\n"
498 			<< "    {\n"
499 			<< "        uint baseVal = (gl_WorkGroupID.x + 1u) * 2000;\n"
500 			<< "        td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota = uvec3(baseVal, baseVal + 1, baseVal + 2);\n"
501 			<< "    }\n"
502 			<< "    {\n"
503 			<< "        uint baseVal = (gl_WorkGroupID.x + 1u) * 3000;\n"
504 			<< "        td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota = vec2(baseVal, baseVal + 1);\n"
505 			<< "    }\n"
506 			<< "    td.externalData.workGroupData.rowId.id = gl_WorkGroupID.x;\n"
507 			<< "    EmitMeshTasksEXT(2u, 1u, 1u);\n"
508 			<< "}\n"
509 			;
510 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
511 	}
512 
513 	{
514 		std::ostringstream mesh;
515 		mesh
516 			<< "#version 450\n"
517 			<< "#extension GL_EXT_mesh_shader : enable\n"
518 			<< "\n"
519 			<< "layout(local_size_x=2) in;\n"
520 			<< "layout(triangles) out;\n"
521 			<< "layout(max_vertices=4, max_primitives=2) out;\n"
522 			<< "\n"
523 			<< "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
524 			<< "\n"
525 			<< taskDataDecl
526 			<< "\n"
527 			<< "void main ()\n"
528 			<< "{\n"
529 			<< "    bool dataOK = true;\n"
530 			<< "    dataOK = (dataOK && (td.yes == 1u));\n"
531 			<< "    dataOK = (dataOK && (td.externalData.OneMillion == 1000000.0 && td.externalData.TwoMillion == 2000000u));\n"
532 			<< "    uint rowId = td.externalData.workGroupData.rowId.id;\n"
533 			<< "    dataOK = (dataOK && (rowId == 0u || rowId == 1u));\n"
534 			<< "\n"
535 			<< "    {\n"
536 			<< "        uint baseVal = (rowId + 1u) * 1000u;\n"
537 			<< "        for (uint i = 0; i < 10; i++) {\n"
538 			<< "            if (td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] != float(baseVal + i)) {\n"
539 			<< "                dataOK = false;\n"
540 			<< "                break;\n"
541 			<< "            }\n"
542 			<< "        }\n"
543 			<< "    }\n"
544 			<< "\n"
545 			<< "    {\n"
546 			<< "        uint baseVal = (rowId + 1u) * 2000;\n"
547 			<< "        uvec3 expected = uvec3(baseVal, baseVal + 1, baseVal + 2);\n"
548 			<< "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota != expected) {\n"
549 			<< "            dataOK = false;\n"
550 			<< "        }\n"
551 			<< "    }\n"
552 			<< "\n"
553 			<< "    {\n"
554 			<< "        uint baseVal = (rowId + 1u) * 3000;\n"
555 			<< "        vec2 expected = vec2(baseVal, baseVal + 1);\n"
556 			<< "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota != expected) {\n"
557 			<< "            dataOK = false;\n"
558 			<< "        }\n"
559 			<< "    }\n"
560 			<< "\n"
561 			<< "    uint columnId = gl_WorkGroupID.x;\n"
562 			<< "\n"
563 			<< "    uvec2 vertPrim = uvec2(0u, 0u);\n"
564 			<< "    if (dataOK) {\n"
565 			<< "        vertPrim = uvec2(4u, 2u);\n"
566 			<< "    }\n"
567 			<< "    SetMeshOutputsEXT(vertPrim.x, vertPrim.y);\n"
568 			<< "    if (vertPrim.y == 0u) {\n"
569 			<< "        return;\n"
570 			<< "    }\n"
571 			<< "\n"
572 			<< "    const vec4 outColor = vec4(rowId, columnId, 1.0f, 1.0f);\n"
573 			<< "    triangleColor[0] = outColor;\n"
574 			<< "    triangleColor[1] = outColor;\n"
575 			<< "\n"
576 			<< "    // Each local invocation will generate two points and one triangle from the quad.\n"
577 			<< "    // The first local invocation will generate the top quad vertices.\n"
578 			<< "    // The second invocation will generate the two bottom vertices.\n"
579 			<< "    vec4 left  = vec4(0.0, 0.0, 0.0, 1.0);\n"
580 			<< "    vec4 right = vec4(1.0, 0.0, 0.0, 1.0);\n"
581 			<< "\n"
582 			<< "    float localInvocationOffsetY = float(gl_LocalInvocationIndex);\n"
583 			<< "    left.y  += localInvocationOffsetY;\n"
584 			<< "    right.y += localInvocationOffsetY;\n"
585 			<< "\n"
586 			<< "    // The code above creates a quad from (0, 0) to (1, 1) but we need to offset it\n"
587 			<< "    // in X and/or Y depending on the row and column, to place it in other quadrants.\n"
588 			<< "    float quadrantOffsetX = float(int(columnId) - 1);\n"
589 			<< "    float quadrantOffsetY = float(int(rowId) - 1);\n"
590 			<< "\n"
591 			<< "    left.x  += quadrantOffsetX;\n"
592 			<< "    right.x += quadrantOffsetX;\n"
593 			<< "\n"
594 			<< "    left.y  += quadrantOffsetY;\n"
595 			<< "    right.y += quadrantOffsetY;\n"
596 			<< "\n"
597 			<< "    uint baseVertexId = 2*gl_LocalInvocationIndex;\n"
598 			<< "    gl_MeshVerticesEXT[baseVertexId + 0].gl_Position = left;\n"
599 			<< "    gl_MeshVerticesEXT[baseVertexId + 1].gl_Position = right;\n"
600 			<< "\n"
601 			<< "    // 0,1,2 or 1,2,3 (note: triangles alternate front face this way)\n"
602 			<< "    const uvec3 indices = uvec3(0 + gl_LocalInvocationIndex, 1 + gl_LocalInvocationIndex, 2 + gl_LocalInvocationIndex);\n"
603 			<< "    gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = indices;\n"
604 			<< "}\n"
605 			;
606 		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
607 	}
608 }
609 
createInstance(Context & context) const610 TestInstance* ComplexTaskDataCase::createInstance (Context& context) const
611 {
612 	return new ComplexTaskDataInstance(context, m_params.get());
613 }
614 
615 // Verify drawing a single point.
616 class SinglePointCase : public MeshShaderMiscCase
617 {
618 public:
SinglePointCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)619 					SinglePointCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
620 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
621 					{}
622 
623 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
624 	TestInstance*	createInstance			(Context& context) const override;
625 };
626 
627 class SinglePointInstance : public MeshShaderMiscInstance
628 {
629 public:
SinglePointInstance(Context & context,const MiscTestParams * params)630 	SinglePointInstance (Context& context, const MiscTestParams* params)
631 		: MeshShaderMiscInstance (context, params)
632 	{}
633 
634 	void	generateReferenceLevel	() override;
635 };
636 
createInstance(Context & context) const637 TestInstance* SinglePointCase::createInstance (Context& context) const
638 {
639 	return new SinglePointInstance (context, m_params.get());
640 }
641 
initPrograms(vk::SourceCollections & programCollection) const642 void SinglePointCase::initPrograms (vk::SourceCollections& programCollection) const
643 {
644 	DE_ASSERT(!m_params->needsTaskShader());
645 
646 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
647 
648 	MeshShaderMiscCase::initPrograms(programCollection);
649 
650 	std::ostringstream mesh;
651 	mesh
652 		<< "#version 450\n"
653 		<< "#extension GL_EXT_mesh_shader : enable\n"
654 		<< "\n"
655 		<< "layout(local_size_x=1) in;\n"
656 		<< "layout(points) out;\n"
657 		<< "layout(max_vertices=256, max_primitives=256) out;\n"
658 		<< "\n"
659 		<< "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
660 		<< "\n"
661 		<< "void main ()\n"
662 		<< "{\n"
663 		<< "    SetMeshOutputsEXT(1u, 1u);\n"
664 		<< "    pointColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
665 		<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n"
666 		<< "    gl_MeshVerticesEXT[0].gl_PointSize = 1.0f;\n"
667 		<< "    gl_PrimitivePointIndicesEXT[0] = 0;\n"
668 		<< "}\n"
669 		;
670 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
671 }
672 
generateReferenceLevel()673 void SinglePointInstance::generateReferenceLevel ()
674 {
675 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
676 
677 	const auto halfWidth	= static_cast<int>(m_params->width / 2u);
678 	const auto halfHeight	= static_cast<int>(m_params->height / 2u);
679 	const auto access		= m_referenceLevel->getAccess();
680 
681 	access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
682 }
683 
684 // Verify drawing a single line.
685 class SingleLineCase : public MeshShaderMiscCase
686 {
687 public:
SingleLineCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)688 					SingleLineCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
689 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
690 					{}
691 
692 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
693 	TestInstance*	createInstance			(Context& context) const override;
694 };
695 
696 class SingleLineInstance : public MeshShaderMiscInstance
697 {
698 public:
SingleLineInstance(Context & context,const MiscTestParams * params)699 	SingleLineInstance (Context& context, const MiscTestParams* params)
700 		: MeshShaderMiscInstance (context, params)
701 	{}
702 
703 	void	generateReferenceLevel	() override;
704 };
705 
createInstance(Context & context) const706 TestInstance* SingleLineCase::createInstance (Context& context) const
707 {
708 	return new SingleLineInstance (context, m_params.get());
709 }
710 
initPrograms(vk::SourceCollections & programCollection) const711 void SingleLineCase::initPrograms (vk::SourceCollections& programCollection) const
712 {
713 	DE_ASSERT(!m_params->needsTaskShader());
714 
715 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
716 
717 	MeshShaderMiscCase::initPrograms(programCollection);
718 
719 	std::ostringstream mesh;
720 	mesh
721 		<< "#version 450\n"
722 		<< "#extension GL_EXT_mesh_shader : enable\n"
723 		<< "\n"
724 		<< "layout(local_size_x=1) in;\n"
725 		<< "layout(lines) out;\n"
726 		<< "layout(max_vertices=256, max_primitives=256) out;\n"
727 		<< "\n"
728 		<< "layout (location=0) out perprimitiveEXT vec4 lineColor[];\n"
729 		<< "\n"
730 		<< "void main ()\n"
731 		<< "{\n"
732 		<< "    SetMeshOutputsEXT(2u, 1u);\n"
733 		<< "    lineColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
734 		<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0f, 0.0f, 0.0f, 1.0f);\n"
735 		<< "    gl_MeshVerticesEXT[1].gl_Position = vec4( 1.0f, 0.0f, 0.0f, 1.0f);\n"
736 		<< "    gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u);\n"
737 		<< "}\n"
738 		;
739 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
740 }
741 
generateReferenceLevel()742 void SingleLineInstance::generateReferenceLevel ()
743 {
744 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
745 
746 	const auto iWidth		= static_cast<int>(m_params->width);
747 	const auto halfHeight	= static_cast<int>(m_params->height / 2u);
748 	const auto access		= m_referenceLevel->getAccess();
749 
750 	// Center row.
751 	for (int x = 0; x < iWidth; ++x)
752 		access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), x, halfHeight);
753 }
754 
755 // Verify drawing a single triangle.
756 class SingleTriangleCase : public MeshShaderMiscCase
757 {
758 public:
SingleTriangleCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)759 					SingleTriangleCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
760 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
761 					{}
762 
763 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
764 	TestInstance*	createInstance			(Context& context) const override;
765 };
766 
767 class SingleTriangleInstance : public MeshShaderMiscInstance
768 {
769 public:
SingleTriangleInstance(Context & context,const MiscTestParams * params)770 	SingleTriangleInstance (Context& context, const MiscTestParams* params)
771 		: MeshShaderMiscInstance (context, params)
772 	{}
773 
774 	void	generateReferenceLevel	() override;
775 };
776 
createInstance(Context & context) const777 TestInstance* SingleTriangleCase::createInstance (Context& context) const
778 {
779 	return new SingleTriangleInstance (context, m_params.get());
780 }
781 
initPrograms(vk::SourceCollections & programCollection) const782 void SingleTriangleCase::initPrograms (vk::SourceCollections& programCollection) const
783 {
784 	DE_ASSERT(!m_params->needsTaskShader());
785 
786 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
787 
788 	MeshShaderMiscCase::initPrograms(programCollection);
789 
790 	const float halfPixelX = 2.0f / static_cast<float>(m_params->width);
791 	const float halfPixelY = 2.0f / static_cast<float>(m_params->height);
792 
793 	std::ostringstream mesh;
794 	mesh
795 		<< "#version 450\n"
796 		<< "#extension GL_EXT_mesh_shader : enable\n"
797 		<< "\n"
798 		<< "layout(local_size_x=1) in;\n"
799 		<< "layout(triangles) out;\n"
800 		<< "layout(max_vertices=256, max_primitives=256) out;\n"
801 		<< "\n"
802 		<< "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
803 		<< "\n"
804 		<< "void main ()\n"
805 		<< "{\n"
806 		<< "    SetMeshOutputsEXT(3u, 1u);\n"
807 		<< "    triangleColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
808 		<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(" <<  halfPixelY << ", " << -halfPixelX << ", 0.0f, 1.0f);\n"
809 		<< "    gl_MeshVerticesEXT[1].gl_Position = vec4(" <<  halfPixelY << ", " <<  halfPixelX << ", 0.0f, 1.0f);\n"
810 		<< "    gl_MeshVerticesEXT[2].gl_Position = vec4(" << -halfPixelY << ", 0.0f, 0.0f, 1.0f);\n"
811 		<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
812 		<< "}\n"
813 		;
814 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
815 }
816 
generateReferenceLevel()817 void SingleTriangleInstance::generateReferenceLevel ()
818 {
819 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
820 
821 	const auto halfWidth	= static_cast<int>(m_params->width / 2u);
822 	const auto halfHeight	= static_cast<int>(m_params->height / 2u);
823 	const auto access		= m_referenceLevel->getAccess();
824 
825 	// Single pixel in the center.
826 	access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
827 }
828 
829 // Verify drawing the maximum number of points.
830 class MaxPointsCase : public MeshShaderMiscCase
831 {
832 public:
MaxPointsCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)833 					MaxPointsCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
834 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
835 					{}
836 
837 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
838 	TestInstance*	createInstance			(Context& context) const override;
839 };
840 
841 class MaxPointsInstance : public MeshShaderMiscInstance
842 {
843 public:
MaxPointsInstance(Context & context,const MiscTestParams * params)844 	MaxPointsInstance (Context& context, const MiscTestParams* params)
845 		: MeshShaderMiscInstance (context, params)
846 	{}
847 
848 	void	generateReferenceLevel	() override;
849 };
850 
createInstance(Context & context) const851 TestInstance* MaxPointsCase::createInstance (Context& context) const
852 {
853 	return new MaxPointsInstance (context, m_params.get());
854 }
855 
initPrograms(vk::SourceCollections & programCollection) const856 void MaxPointsCase::initPrograms (vk::SourceCollections& programCollection) const
857 {
858 	DE_ASSERT(!m_params->needsTaskShader());
859 
860 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
861 
862 	MeshShaderMiscCase::initPrograms(programCollection);
863 
864 	// Fill a 16x16 image with 256 points. Each of the 64 local invocations will handle a segment of 4 pixels. 4 segments per row.
865 	DE_ASSERT(m_params->width == 16u && m_params->height == 16u);
866 
867 	std::ostringstream mesh;
868 	mesh
869 		<< "#version 450\n"
870 		<< "#extension GL_EXT_mesh_shader : enable\n"
871 		<< "\n"
872 		<< "layout(local_size_x=8, local_size_y=2, local_size_z=4) in;\n"
873 		<< "layout(points) out;\n"
874 		<< "layout(max_vertices=256, max_primitives=256) out;\n"
875 		<< "\n"
876 		<< "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
877 		<< "\n"
878 		<< "void main ()\n"
879 		<< "{\n"
880 		<< "    SetMeshOutputsEXT(256u, 256u);\n"
881 		<< "    uint firstPixel = 4u * gl_LocalInvocationIndex;\n"
882 		<< "    uint row = firstPixel / 16u;\n"
883 		<< "    uint col = firstPixel % 16u;\n"
884 		<< "    float pixSize = 2.0f / 16.0f;\n"
885 		<< "    float yCoord = pixSize * (float(row) + 0.5f) - 1.0f;\n"
886 		<< "    float baseXCoord = pixSize * (float(col) + 0.5f) - 1.0f;\n"
887 		<< "    for (uint i = 0; i < 4u; i++) {\n"
888 		<< "        float xCoord = baseXCoord + pixSize * float(i);\n"
889 		<< "        uint pixId = firstPixel + i;\n"
890 		<< "        gl_MeshVerticesEXT[pixId].gl_Position = vec4(xCoord, yCoord, 0.0f, 1.0f);\n"
891 		<< "        gl_MeshVerticesEXT[pixId].gl_PointSize = 1.0f;\n"
892 		<< "        gl_PrimitivePointIndicesEXT[pixId] = pixId;\n"
893 		<< "        pointColor[pixId] = vec4(((xCoord + 1.0f) / 2.0f), ((yCoord + 1.0f) / 2.0f), 0.0f, 1.0f);\n"
894 		<< "    }\n"
895 		<< "}\n"
896 		;
897 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
898 }
899 
generateReferenceLevel()900 void MaxPointsInstance::generateReferenceLevel ()
901 {
902 	const auto format		= getOutputFormat();
903 	const auto tcuFormat	= mapVkFormat(format);
904 
905 	const auto iWidth		= static_cast<int>(m_params->width);
906 	const auto iHeight		= static_cast<int>(m_params->height);
907 	const auto fWidth		= static_cast<float>(m_params->width);
908 	const auto fHeight		= static_cast<float>(m_params->height);
909 
910 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
911 
912 	const auto access		= m_referenceLevel->getAccess();
913 
914 	// Fill with gradient like the shader does.
915 	for (int y = 0; y < iHeight; ++y)
916 	for (int x = 0; x < iWidth; ++x)
917 	{
918 		const tcu::Vec4 color (
919 			((static_cast<float>(x) + 0.5f) / fWidth),
920 			((static_cast<float>(y) + 0.5f) / fHeight),
921 			0.0f, 1.0f);
922 		access.setPixel(color, x, y);
923 	}
924 }
925 
926 // Verify drawing the maximum number of lines.
927 class MaxLinesCase : public MeshShaderMiscCase
928 {
929 public:
MaxLinesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)930 					MaxLinesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
931 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
932 					{}
933 
934 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
935 	TestInstance*	createInstance			(Context& context) const override;
936 };
937 
938 class MaxLinesInstance : public MeshShaderMiscInstance
939 {
940 public:
MaxLinesInstance(Context & context,const MiscTestParams * params)941 	MaxLinesInstance (Context& context, const MiscTestParams* params)
942 		: MeshShaderMiscInstance (context, params)
943 	{}
944 
945 	void	generateReferenceLevel	() override;
946 };
947 
createInstance(Context & context) const948 TestInstance* MaxLinesCase::createInstance (Context& context) const
949 {
950 	return new MaxLinesInstance (context, m_params.get());
951 }
952 
initPrograms(vk::SourceCollections & programCollection) const953 void MaxLinesCase::initPrograms (vk::SourceCollections& programCollection) const
954 {
955 	DE_ASSERT(!m_params->needsTaskShader());
956 
957 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
958 
959 	MeshShaderMiscCase::initPrograms(programCollection);
960 
961 	// Fill a 1x1020 image with 255 lines, each line being 4 pixels tall. Each invocation will generate ~4 lines.
962 	DE_ASSERT(m_params->width == 1u && m_params->height == 1020u);
963 
964 	std::ostringstream mesh;
965 	mesh
966 		<< "#version 450\n"
967 		<< "#extension GL_EXT_mesh_shader : enable\n"
968 		<< "\n"
969 		<< "layout(local_size_x=4, local_size_y=2, local_size_z=8) in;\n"
970 		<< "layout(lines) out;\n"
971 		<< "layout(max_vertices=256, max_primitives=255) out;\n"
972 		<< "\n"
973 		<< "layout (location=0) out perprimitiveEXT vec4 lineColor[];\n"
974 		<< "\n"
975 		<< "void main ()\n"
976 		<< "{\n"
977 		<< "    SetMeshOutputsEXT(256u, 255u);\n"
978 		<< "    uint firstLine = 4u * gl_LocalInvocationIndex;\n"
979 		<< "    for (uint i = 0u; i < 4u; i++) {\n"
980 		<< "        uint lineId = firstLine + i;\n"
981 		<< "        uint topPixel = 4u * lineId;\n"
982 		<< "        uint bottomPixel = 3u + topPixel;\n"
983 		<< "        if (bottomPixel < 1020u) {\n"
984 		<< "            float bottomCoord = ((float(bottomPixel) + 1.0f) / 1020.0) * 2.0 - 1.0;\n"
985 		<< "            gl_MeshVerticesEXT[lineId + 1u].gl_Position = vec4(0.0, bottomCoord, 0.0f, 1.0f);\n"
986 		<< "            gl_PrimitiveLineIndicesEXT[lineId] = uvec2(lineId, lineId + 1u);\n"
987 		<< "            lineColor[lineId] = vec4(0.0f, 1.0f, float(lineId) / 255.0f, 1.0f);\n"
988 		<< "        } else {\n"
989 		<< "            // The last iteration of the last invocation emits the first point\n"
990 		<< "            gl_MeshVerticesEXT[0].gl_Position = vec4(0.0, -1.0, 0.0f, 1.0f);\n"
991 		<< "        }\n"
992 		<< "    }\n"
993 		<< "}\n"
994 		;
995 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
996 }
997 
generateReferenceLevel()998 void MaxLinesInstance::generateReferenceLevel ()
999 {
1000 	const auto format		= getOutputFormat();
1001 	const auto tcuFormat	= mapVkFormat(format);
1002 
1003 	const auto iWidth		= static_cast<int>(m_params->width);
1004 	const auto iHeight		= static_cast<int>(m_params->height);
1005 
1006 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
1007 
1008 	const auto access		= m_referenceLevel->getAccess();
1009 
1010 	// Fill lines, 4 pixels per line.
1011 	const uint32_t kNumLines = 255u;
1012 	const uint32_t kLineHeight = 4u;
1013 
1014 	for (uint32_t i = 0u; i < kNumLines; ++i)
1015 	{
1016 		const tcu::Vec4 color (0.0f, 1.0f, static_cast<float>(i) / static_cast<float>(kNumLines), 1.0f);
1017 		for (uint32_t j = 0u; j < kLineHeight; ++j)
1018 			access.setPixel(color, 0, i*kLineHeight + j);
1019 	}
1020 }
1021 
1022 // Verify drawing the maximum number of triangles.
1023 class MaxTrianglesCase : public MeshShaderMiscCase
1024 {
1025 public:
1026 	struct Params : public MiscTestParams
1027 	{
1028 		tcu::UVec3 localSize;
1029 
Paramsvkt::MeshShader::__anonb99b7fd80111::MaxTrianglesCase::Params1030 		Params (const tcu::UVec3& meshCount_, uint32_t width_, uint32_t height_, const tcu::UVec3& localSize_)
1031 			: MiscTestParams	(tcu::Nothing, meshCount_, width_, height_)
1032 			, localSize			(localSize_)
1033 			{}
1034 	};
1035 
MaxTrianglesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1036 					MaxTrianglesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1037 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1038 					{}
1039 
1040 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1041 	TestInstance*	createInstance			(Context& context) const override;
1042 
1043 	static constexpr uint32_t kNumVertices	= 256u;
1044 	static constexpr uint32_t kNumTriangles	= 254u;
1045 };
1046 
1047 class MaxTrianglesInstance : public MeshShaderMiscInstance
1048 {
1049 public:
MaxTrianglesInstance(Context & context,const MiscTestParams * params)1050 	MaxTrianglesInstance (Context& context, const MiscTestParams* params)
1051 		: MeshShaderMiscInstance (context, params)
1052 	{}
1053 
1054 	void	generateReferenceLevel	() override;
1055 };
1056 
createInstance(Context & context) const1057 TestInstance* MaxTrianglesCase::createInstance (Context& context) const
1058 {
1059 	return new MaxTrianglesInstance (context, m_params.get());
1060 }
1061 
initPrograms(vk::SourceCollections & programCollection) const1062 void MaxTrianglesCase::initPrograms (vk::SourceCollections& programCollection) const
1063 {
1064 	// Default frag shader.
1065 	MeshShaderMiscCase::initPrograms(programCollection);
1066 
1067 	const auto buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1068 	const auto params		= dynamic_cast<const MaxTrianglesCase::Params*>(m_params.get());
1069 
1070 	DE_ASSERT(params);
1071 	DE_ASSERT(!params->needsTaskShader());
1072 
1073 	const auto&	localSize		= params->localSize;
1074 	const auto	workGroupSize	= (localSize.x() * localSize.y() * localSize.z());
1075 
1076 	DE_ASSERT(kNumVertices % workGroupSize == 0u);
1077 	const auto trianglesPerInvocation = kNumVertices / workGroupSize;
1078 
1079 	// Fill a sufficiently large image with solid color. Generate a quarter of a circle with the center in the top left corner,
1080 	// using a triangle fan that advances from top to bottom. Each invocation will generate ~trianglesPerInvocation triangles.
1081 	std::ostringstream mesh;
1082 	mesh
1083 		<< "#version 450\n"
1084 		<< "#extension GL_EXT_mesh_shader : enable\n"
1085 		<< "\n"
1086 		<< "layout(local_size_x=" << localSize.x() << ", local_size_y=" << localSize.y() << ", local_size_z=" << localSize.z() << ") in;\n"
1087 		<< "layout(triangles) out;\n"
1088 		<< "layout(max_vertices=" << kNumVertices << ", max_primitives=" << kNumTriangles << ") out;\n"
1089 		<< "\n"
1090 		<< "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
1091 		<< "\n"
1092 		<< "const float PI_2 = 1.57079632679489661923;\n"
1093 		<< "const float RADIUS = 4.5;\n"
1094 		<< "\n"
1095 		<< "void main ()\n"
1096 		<< "{\n"
1097 		<< "    const uint trianglesPerInvocation = " << trianglesPerInvocation << "u;\n"
1098 		<< "    const uint numVertices = " << kNumVertices << "u;\n"
1099 		<< "    const uint numTriangles = " << kNumTriangles << "u;\n"
1100 		<< "    const float fNumTriangles = float(numTriangles);\n"
1101 		<< "    SetMeshOutputsEXT(numVertices, numTriangles);\n"
1102 		<< "    uint firstTriangle = trianglesPerInvocation * gl_LocalInvocationIndex;\n"
1103 		<< "    for (uint i = 0u; i < trianglesPerInvocation; i++) {\n"
1104 		<< "        uint triangleId = firstTriangle + i;\n"
1105 		<< "        if (triangleId < numTriangles) {\n"
1106 		<< "            uint vertexId = triangleId + 2u;\n"
1107 		<< "            float angleProportion = float(vertexId - 1u) / fNumTriangles;\n"
1108 		<< "            float angle = PI_2 * angleProportion;\n"
1109 		<< "            float xCoord = cos(angle) * RADIUS - 1.0;\n"
1110 		<< "            float yCoord = sin(angle) * RADIUS - 1.0;\n"
1111 		<< "            gl_MeshVerticesEXT[vertexId].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1112 		<< "            gl_PrimitiveTriangleIndicesEXT[triangleId] = uvec3(0u, triangleId + 1u, triangleId + 2u);\n"
1113 		<< "            triangleColor[triangleId] = vec4(0.0f, 0.0f, 1.0f, 1.0f);\n"
1114 		<< "        } else {\n"
1115 		<< "            // The last iterations of the last invocation emit the first two vertices\n"
1116 		<< "            uint vertexId = triangleId - numTriangles;\n"
1117 		<< "            if (vertexId == 0u) {\n"
1118 		<< "                gl_MeshVerticesEXT[0u].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
1119 		<< "            } else {\n"
1120 		<< "                gl_MeshVerticesEXT[1u].gl_Position = vec4(RADIUS, -1.0, 0.0, 1.0);\n"
1121 		<< "            }\n"
1122 		<< "        }\n"
1123 		<< "    }\n"
1124 		<< "}\n"
1125 		;
1126 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1127 }
1128 
generateReferenceLevel()1129 void MaxTrianglesInstance::generateReferenceLevel ()
1130 {
1131 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1132 }
1133 
1134 struct LargeWorkGroupParams : public MiscTestParams
1135 {
LargeWorkGroupParamsvkt::MeshShader::__anonb99b7fd80111::LargeWorkGroupParams1136 	LargeWorkGroupParams (const tcu::Maybe<tcu::UVec3>& taskCount_, const tcu::UVec3& meshCount_, uint32_t width_, uint32_t height_, const tcu::UVec3& localInvocations_)
1137 		: MiscTestParams	(taskCount_, meshCount_, width_, height_)
1138 		, localInvocations	(localInvocations_)
1139 	{}
1140 
1141 	tcu::UVec3 localInvocations;
1142 };
1143 
1144 // Large work groups with many threads.
1145 class LargeWorkGroupCase : public MeshShaderMiscCase
1146 {
1147 public:
LargeWorkGroupCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1148 					LargeWorkGroupCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1149 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1150 					{}
1151 
1152 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1153 	TestInstance*	createInstance			(Context& context) const override;
1154 };
1155 
1156 class LargeWorkGroupInstance : public MeshShaderMiscInstance
1157 {
1158 public:
LargeWorkGroupInstance(Context & context,const MiscTestParams * params)1159 	LargeWorkGroupInstance (Context& context, const MiscTestParams* params)
1160 		: MeshShaderMiscInstance (context, params)
1161 	{}
1162 
1163 	void	generateReferenceLevel	() override;
1164 };
1165 
createInstance(Context & context) const1166 TestInstance* LargeWorkGroupCase::createInstance (Context& context) const
1167 {
1168 	return new LargeWorkGroupInstance(context, m_params.get());
1169 }
1170 
generateReferenceLevel()1171 void LargeWorkGroupInstance::generateReferenceLevel ()
1172 {
1173 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1174 }
1175 
1176 // 'x', 'y' or 'z' depending on if dim is 0, 1 or 2, respectively.
dimSuffix(int dim)1177 char dimSuffix (int dim)
1178 {
1179 	const std::string suffixes = "xyz";
1180 	DE_ASSERT(dim >= 0 && dim < static_cast<int>(suffixes.size()));
1181 	return suffixes[dim];
1182 }
1183 
initPrograms(vk::SourceCollections & programCollection) const1184 void LargeWorkGroupCase::initPrograms (vk::SourceCollections& programCollection) const
1185 {
1186 	const auto buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1187 	const auto params		= dynamic_cast<LargeWorkGroupParams*>(m_params.get());
1188 	DE_ASSERT(params);
1189 
1190 	const auto	totalInvocations	= params->localInvocations.x() * params->localInvocations.y() * params->localInvocations.z();
1191 	const auto	useTaskShader		= params->needsTaskShader();
1192 	uint32_t	taskMultiplier		= 1u;
1193 	const auto&	meshCount			= params->meshCount;
1194 	const auto	meshMultiplier		= meshCount.x() * meshCount.y() * meshCount.z();
1195 
1196 	if (useTaskShader)
1197 	{
1198 		const auto dim	= params->taskCount.get();
1199 		taskMultiplier	= dim.x() * dim.y() * dim.z();
1200 	}
1201 
1202 	// Add the frag shader.
1203 	MeshShaderMiscCase::initPrograms(programCollection);
1204 
1205 	std::ostringstream taskData;
1206 	taskData
1207 		<< "struct TaskData {\n"
1208 		<< "    uint parentTask[" << totalInvocations << "];\n"
1209 		<< "};\n"
1210 		<< "taskPayloadSharedEXT TaskData td;\n"
1211 		;
1212 	const auto taskDataStr = taskData.str();
1213 
1214 	const std::string localSizeStr = "layout ("
1215 		"local_size_x=" + std::to_string(params->localInvocations.x()) + ", "
1216 		"local_size_y=" + std::to_string(params->localInvocations.y()) + ", "
1217 		"local_size_z=" + std::to_string(params->localInvocations.z())
1218 		+ ") in;\n"
1219 		;
1220 
1221 	if (useTaskShader)
1222 	{
1223 		std::ostringstream task;
1224 		task
1225 			<< "#version 450\n"
1226 			<< "#extension GL_EXT_mesh_shader : enable\n"
1227 			<< "\n"
1228 			<< localSizeStr
1229 			<< "\n"
1230 			<< taskDataStr
1231 			<< "\n"
1232 			<< "void main () {\n"
1233 			<< "    const uint workGroupIndex = gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupID.z + gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1234 			<< "    td.parentTask[gl_LocalInvocationIndex] = workGroupIndex;\n"
1235 			<< "    EmitMeshTasksEXT(" << meshCount.x() << ", " << meshCount.y() << ", " << meshCount.z() << ");\n"
1236 			<< "}\n"
1237 			;
1238 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1239 	}
1240 
1241 	// Needed for the code below to work.
1242 	DE_ASSERT(params->width * params->height == taskMultiplier * meshMultiplier * totalInvocations);
1243 	DE_UNREF(taskMultiplier); // For release builds.
1244 
1245 	// Emit one point per framebuffer pixel. The number of jobs (params->localInvocations in each mesh shader work group, multiplied
1246 	// by the number of mesh work groups emitted by each task work group) must be the same as the total framebuffer size. Calculate
1247 	// a job ID corresponding to the current mesh shader invocation, and assign a pixel position to it. Draw a point at that
1248 	// position.
1249 	std::ostringstream mesh;
1250 	mesh
1251 		<< "#version 450\n"
1252 		<< "#extension GL_EXT_mesh_shader : enable\n"
1253 		<< "\n"
1254 		<< localSizeStr
1255 		<< "layout (points) out;\n"
1256 		<< "layout (max_vertices=" << totalInvocations << ", max_primitives=" << totalInvocations << ") out;\n"
1257 		<< "\n"
1258 		<< (useTaskShader ? taskDataStr : "")
1259 		<< "\n"
1260 		<< "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
1261 		<< "\n"
1262 		<< "void main () {\n"
1263 		<< "    uint parentTask = " << (useTaskShader ? "td.parentTask[0]" : "0") << ";\n";
1264 		;
1265 
1266 	if (useTaskShader)
1267 	{
1268 		mesh
1269 			<< "    if (td.parentTask[gl_LocalInvocationIndex] != parentTask || parentTask >= " << taskMultiplier << ") {\n"
1270 			<< "        return;\n"
1271 			<< "    }\n"
1272 			;
1273 	}
1274 
1275 	mesh
1276 		<< "    SetMeshOutputsEXT(" << totalInvocations << ", " << totalInvocations << ");\n"
1277 		<< "    const uint workGroupIndex = gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupID.z + gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1278 		<< "    uint jobId = ((parentTask * " << meshMultiplier << ") + workGroupIndex) * " << totalInvocations << " + gl_LocalInvocationIndex;\n"
1279 		<< "    uint row = jobId / " << params->width << ";\n"
1280 		<< "    uint col = jobId % " << params->width << ";\n"
1281 		<< "    float yCoord = (float(row + 0.5) / " << params->height << ".0) * 2.0 - 1.0;\n"
1282 		<< "    float xCoord = (float(col + 0.5) / " << params->width << ".0) * 2.0 - 1.0;\n"
1283 		<< "    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1284 		<< "    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 1.0;\n"
1285 		<< "    gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;\n"
1286 		<< "    vec4 resultColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
1287 		;
1288 
1289 	mesh
1290 		<< "    pointColor[gl_LocalInvocationIndex] = resultColor;\n"
1291 		<< "}\n"
1292 		;
1293 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1294 }
1295 
1296 // Tests that generate no primitives of a given type.
1297 enum class PrimitiveType { POINTS=0, LINES, TRIANGLES };
1298 
primitiveTypeName(PrimitiveType primitiveType)1299 std::string primitiveTypeName (PrimitiveType primitiveType)
1300 {
1301 	std::string primitiveName;
1302 
1303 	switch (primitiveType)
1304 	{
1305 	case PrimitiveType::POINTS:		primitiveName = "points";		break;
1306 	case PrimitiveType::LINES:		primitiveName = "lines";		break;
1307 	case PrimitiveType::TRIANGLES:	primitiveName = "triangles";	break;
1308 	default: DE_ASSERT(false); break;
1309 	}
1310 
1311 	return primitiveName;
1312 }
1313 
1314 struct NoPrimitivesParams : public MiscTestParams
1315 {
NoPrimitivesParamsvkt::MeshShader::__anonb99b7fd80111::NoPrimitivesParams1316 	NoPrimitivesParams (const tcu::Maybe<tcu::UVec3>& taskCount_, const tcu::UVec3& meshCount_, uint32_t width_, uint32_t height_, PrimitiveType primitiveType_)
1317 		: MiscTestParams	(taskCount_, meshCount_, width_, height_)
1318 		, primitiveType		(primitiveType_)
1319 		{}
1320 
1321 	PrimitiveType primitiveType;
1322 };
1323 
1324 class NoPrimitivesCase : public MeshShaderMiscCase
1325 {
1326 public:
NoPrimitivesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1327 					NoPrimitivesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1328 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1329 					{}
1330 
1331 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1332 	TestInstance*	createInstance			(Context& context) const override;
1333 };
1334 
1335 class NoPrimitivesInstance : public MeshShaderMiscInstance
1336 {
1337 public:
NoPrimitivesInstance(Context & context,const MiscTestParams * params)1338 	NoPrimitivesInstance (Context& context, const MiscTestParams* params)
1339 		: MeshShaderMiscInstance (context, params)
1340 	{}
1341 
1342 	void	generateReferenceLevel	() override;
1343 };
1344 
generateReferenceLevel()1345 void NoPrimitivesInstance::generateReferenceLevel ()
1346 {
1347 	// No primitives: clear color.
1348 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
1349 }
1350 
createInstance(Context & context) const1351 TestInstance* NoPrimitivesCase::createInstance (Context& context) const
1352 {
1353 	return new NoPrimitivesInstance(context, m_params.get());
1354 }
1355 
initPrograms(vk::SourceCollections & programCollection) const1356 void NoPrimitivesCase::initPrograms (vk::SourceCollections& programCollection) const
1357 {
1358 	const auto buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1359 	const auto params		= dynamic_cast<NoPrimitivesParams*>(m_params.get());
1360 
1361 	DE_ASSERT(params);
1362 	DE_ASSERT(!params->needsTaskShader());
1363 
1364 	const auto primitiveName = primitiveTypeName(params->primitiveType);
1365 
1366 	std::ostringstream mesh;
1367 	mesh
1368 		<< "#version 450\n"
1369 		<< "#extension GL_EXT_mesh_shader : enable\n"
1370 		<< "\n"
1371 		<< "layout (local_size_x=128) in;\n"
1372 		<< "layout (" << primitiveName << ") out;\n"
1373 		<< "layout (max_vertices=256, max_primitives=256) out;\n"
1374 		<< "\n"
1375 		<< "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1376 		<< "\n"
1377 		<< "void main () {\n"
1378 		<< "    SetMeshOutputsEXT(0u, 0u);\n"
1379 		<< "}\n"
1380 		;
1381 
1382 	MeshShaderMiscCase::initPrograms(programCollection);
1383 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1384 }
1385 
1386 class NoPrimitivesExtraWritesCase : public NoPrimitivesCase
1387 {
1388 public:
NoPrimitivesExtraWritesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1389 					NoPrimitivesExtraWritesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1390 						: NoPrimitivesCase (testCtx, name, description, std::move(params))
1391 					{}
1392 
1393 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1394 
1395 	static constexpr uint32_t kLocalInvocations = 128u;
1396 };
1397 
initPrograms(vk::SourceCollections & programCollection) const1398 void NoPrimitivesExtraWritesCase::initPrograms (vk::SourceCollections& programCollection) const
1399 {
1400 	const auto buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1401 	const auto params		= dynamic_cast<NoPrimitivesParams*>(m_params.get());
1402 
1403 	DE_ASSERT(params);
1404 	DE_ASSERT(m_params->needsTaskShader());
1405 
1406 	std::ostringstream taskData;
1407 	taskData
1408 		<< "struct TaskData {\n"
1409 		<< "    uint localInvocations[" << kLocalInvocations << "];\n"
1410 		<< "};\n"
1411 		<< "taskPayloadSharedEXT TaskData td;\n"
1412 		;
1413 	const auto taskDataStr = taskData.str();
1414 
1415 	std::ostringstream task;
1416 	task
1417 		<< "#version 450\n"
1418 		<< "#extension GL_EXT_mesh_shader : enable\n"
1419 		<< "\n"
1420 		<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1421 		<< "\n"
1422 		<< taskDataStr
1423 		<< "\n"
1424 		<< "void main () {\n"
1425 		<< "    td.localInvocations[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;\n"
1426 		<< "    EmitMeshTasksEXT(" << params->meshCount.x() << ", " << params->meshCount.y() << ", " << params->meshCount.z() << ");\n"
1427 		<< "}\n"
1428 		;
1429 	programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1430 
1431 	const auto primitiveName = primitiveTypeName(params->primitiveType);
1432 
1433 	// Otherwise the shader would be illegal.
1434 	DE_ASSERT(kLocalInvocations > 2u);
1435 
1436 	uint32_t maxPrimitives = 0u;
1437 	switch (params->primitiveType)
1438 	{
1439 	case PrimitiveType::POINTS:		maxPrimitives = kLocalInvocations - 0u;	break;
1440 	case PrimitiveType::LINES:		maxPrimitives = kLocalInvocations - 1u;	break;
1441 	case PrimitiveType::TRIANGLES:	maxPrimitives = kLocalInvocations - 2u;	break;
1442 	default: DE_ASSERT(false); break;
1443 	}
1444 
1445 	const std::string pointSizeDecl	= ((params->primitiveType == PrimitiveType::POINTS)
1446 									? "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 1.0;\n"
1447 									: "");
1448 
1449 	std::ostringstream mesh;
1450 	mesh
1451 		<< "#version 450\n"
1452 		<< "#extension GL_EXT_mesh_shader : enable\n"
1453 		<< "\n"
1454 		<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1455 		<< "layout (" << primitiveName << ") out;\n"
1456 		<< "layout (max_vertices=" << kLocalInvocations << ", max_primitives=" << maxPrimitives << ") out;\n"
1457 		<< "\n"
1458 		<< taskDataStr
1459 		<< "\n"
1460 		<< "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1461 		<< "\n"
1462 		<< "shared uint sumOfIds;\n"
1463 		<< "\n"
1464 		<< "const float PI_2 = 1.57079632679489661923;\n"
1465 		<< "const float RADIUS = 1.0f;\n"
1466 		<< "\n"
1467 		<< "void main ()\n"
1468 		<< "{\n"
1469 		<< "    sumOfIds = 0u;\n"
1470 		<< "    memoryBarrierShared();\n"
1471 		<< "    barrier();\n"
1472 		<< "    atomicAdd(sumOfIds, td.localInvocations[gl_LocalInvocationIndex]);\n"
1473 		<< "    memoryBarrierShared();\n"
1474 		<< "    barrier();\n"
1475 		<< "    // This should dynamically give 0\n"
1476 		<< "    uint primitiveCount = sumOfIds - (" << kLocalInvocations * (kLocalInvocations - 1u) / 2u << ");\n"
1477 		<< "    SetMeshOutputsEXT(primitiveCount, primitiveCount);\n"
1478 		<< "\n"
1479 		<< "    // Emit points and primitives to the arrays in any case\n"
1480 		<< "    if (gl_LocalInvocationIndex > 0u) {\n"
1481 		<< "        float proportion = (float(gl_LocalInvocationIndex - 1u) + 0.5f) / float(" << kLocalInvocations << " - 1u);\n"
1482 		<< "        float angle = PI_2 * proportion;\n"
1483 		<< "        float xCoord = cos(angle) * RADIUS - 1.0;\n"
1484 		<< "        float yCoord = sin(angle) * RADIUS - 1.0;\n"
1485 		<< "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1486 		<< pointSizeDecl
1487 		<< "    } else {\n"
1488 		<< "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1489 		<< pointSizeDecl
1490 		<< "    }\n"
1491 		<< "    uint primitiveId = max(gl_LocalInvocationIndex, " << (maxPrimitives - 1u) << ");\n"
1492 		<< "    primitiveColor[primitiveId] = vec4(0.0, 0.0, 1.0, 1.0);\n"
1493 		;
1494 
1495 	if (params->primitiveType == PrimitiveType::POINTS)
1496 		mesh << "    gl_PrimitivePointIndicesEXT[primitiveId] = primitiveId;\n";
1497 	else if (params->primitiveType == PrimitiveType::LINES)
1498 		mesh << "    gl_PrimitiveLineIndicesEXT[primitiveId] = uvec2(primitiveId + 0u, primitiveId + 1u);\n";
1499 	else if (params->primitiveType == PrimitiveType::TRIANGLES)
1500 		mesh << "    gl_PrimitiveTriangleIndicesEXT[primitiveId] = uvec3(0u, primitiveId + 1u, primitiveId + 3u);\n";
1501 	else
1502 		DE_ASSERT(false);
1503 
1504 	mesh << "}\n";
1505 
1506 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1507 
1508 	MeshShaderMiscCase::initPrograms(programCollection);
1509 }
1510 
1511 // Case testing barrier().
1512 class SimpleBarrierCase : public MeshShaderMiscCase
1513 {
1514 public:
SimpleBarrierCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1515 					SimpleBarrierCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1516 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1517 					{}
1518 
1519 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1520 	TestInstance*	createInstance			(Context& context) const override;
1521 
1522 	static constexpr uint32_t kLocalInvocations = 32u;
1523 };
1524 
1525 class SimpleBarrierInstance : public MeshShaderMiscInstance
1526 {
1527 public:
SimpleBarrierInstance(Context & context,const MiscTestParams * params)1528 	SimpleBarrierInstance (Context& context, const MiscTestParams* params)
1529 		: MeshShaderMiscInstance (context, params)
1530 	{}
1531 
1532 	void	generateReferenceLevel	() override;
1533 };
1534 
createInstance(Context & context) const1535 TestInstance* SimpleBarrierCase::createInstance (Context& context) const
1536 {
1537 	return new SimpleBarrierInstance(context, m_params.get());
1538 }
1539 
generateReferenceLevel()1540 void SimpleBarrierInstance::generateReferenceLevel ()
1541 {
1542 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1543 }
1544 
initPrograms(vk::SourceCollections & programCollection) const1545 void SimpleBarrierCase::initPrograms (vk::SourceCollections& programCollection) const
1546 {
1547 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1548 
1549 	// Generate frag shader.
1550 	MeshShaderMiscCase::initPrograms(programCollection);
1551 
1552 	DE_ASSERT(m_params->meshCount == tcu::UVec3(1u, 1u, 1u));
1553 	DE_ASSERT(m_params->width == 1u && m_params->height == 1u);
1554 
1555 	const std::string taskOK		= "workGroupSize = uvec3(1u, 1u, 1u);\n";
1556 	const std::string taskFAIL		= "workGroupSize = uvec3(0u, 0u, 0u);\n";
1557 
1558 	const std::string meshOK		= "vertPrim = uvec2(1u, 1u);\n";
1559 	const std::string meshFAIL		= "vertPrim = uvec2(0u, 0u);\n";
1560 
1561 	const std::string okStatement	= (m_params->needsTaskShader() ? taskOK : meshOK);
1562 	const std::string failStatement	= (m_params->needsTaskShader() ? taskFAIL : meshFAIL);
1563 
1564 	const std::string	sharedDecl = "shared uint counter;\n\n";
1565 	std::ostringstream	verification;
1566 	verification
1567 		<< "counter = 0;\n"
1568 		<< "memoryBarrierShared();\n"
1569 		<< "barrier();\n"
1570 		<< "atomicAdd(counter, 1u);\n"
1571 		<< "memoryBarrierShared();\n"
1572 		<< "barrier();\n"
1573 		<< "if (gl_LocalInvocationIndex == 0u) {\n"
1574 		<< "    if (counter == " << kLocalInvocations << ") {\n"
1575 		<< "\n"
1576 		<< okStatement
1577 		<< "\n"
1578 		<< "    } else {\n"
1579 		<< "\n"
1580 		<< failStatement
1581 		<< "\n"
1582 		<< "    }\n"
1583 		<< "}\n"
1584 		;
1585 
1586 	// The mesh shader is very similar in both cases, so we use a template.
1587 	std::ostringstream meshTemplateStr;
1588 	meshTemplateStr
1589 		<< "#version 450\n"
1590 		<< "#extension GL_EXT_mesh_shader : enable\n"
1591 		<< "\n"
1592 		<< "layout (local_size_x=${LOCAL_SIZE}) in;\n"
1593 		<< "layout (points) out;\n"
1594 		<< "layout (max_vertices=1, max_primitives=1) out;\n"
1595 		<< "\n"
1596 		<< "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1597 		<< "\n"
1598 		<< "${GLOBALS:opt}"
1599 		<< "void main ()\n"
1600 		<< "{\n"
1601 		<< "    uvec2 vertPrim = uvec2(0u, 0u);\n"
1602 		<< "${BODY}"
1603 		<< "    SetMeshOutputsEXT(vertPrim.x, vertPrim.y);\n"
1604 		<< "    if (gl_LocalInvocationIndex == 0u && vertPrim.x > 0u) {\n"
1605 		<< "        gl_MeshVerticesEXT[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1606 		<< "        gl_MeshVerticesEXT[0].gl_PointSize = 1.0;\n"
1607 		<< "        primitiveColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
1608 		<< "        gl_PrimitivePointIndicesEXT[0] = 0;\n"
1609 		<< "    }\n"
1610 		<< "}\n"
1611 		;
1612 	const tcu::StringTemplate meshTemplate = meshTemplateStr.str();
1613 
1614 	if (m_params->needsTaskShader())
1615 	{
1616 		std::ostringstream task;
1617 		task
1618 			<< "#version 450\n"
1619 			<< "#extension GL_EXT_mesh_shader : enable\n"
1620 			<< "\n"
1621 			<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1622 			<< "\n"
1623 			<< sharedDecl
1624 			<< "void main ()\n"
1625 			<< "{\n"
1626 			<< "    uvec3 workGroupSize = uvec3(0u, 0u, 0u);\n"
1627 			<< verification.str()
1628 			<< "    EmitMeshTasksEXT(workGroupSize.x, workGroupSize.y, workGroupSize.z);\n"
1629 			<< "}\n"
1630 			;
1631 
1632 		std::map<std::string, std::string> replacements;
1633 		replacements["LOCAL_SIZE"]	= "1";
1634 		replacements["BODY"]		= meshOK;
1635 
1636 		const auto meshStr = meshTemplate.specialize(replacements);
1637 
1638 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1639 		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr) << buildOptions;
1640 	}
1641 	else
1642 	{
1643 		std::map<std::string, std::string> replacements;
1644 		replacements["LOCAL_SIZE"]	= std::to_string(kLocalInvocations);
1645 		replacements["BODY"]		= verification.str();
1646 		replacements["GLOBALS"]		= sharedDecl;
1647 
1648 		const auto meshStr = meshTemplate.specialize(replacements);
1649 
1650 		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr) << buildOptions;
1651 	}
1652 }
1653 
1654 // Case testing memoryBarrierShared() and groupMemoryBarrier().
1655 enum class MemoryBarrierType { SHARED = 0, GROUP };
1656 
1657 struct MemoryBarrierParams : public MiscTestParams
1658 {
MemoryBarrierParamsvkt::MeshShader::__anonb99b7fd80111::MemoryBarrierParams1659 	MemoryBarrierParams (const tcu::Maybe<tcu::UVec3>& taskCount_, const tcu::UVec3& meshCount_, uint32_t width_, uint32_t height_, MemoryBarrierType memBarrierType_)
1660 		: MiscTestParams	(taskCount_, meshCount_, width_, height_)
1661 		, memBarrierType	(memBarrierType_)
1662 	{}
1663 
1664 	MemoryBarrierType memBarrierType;
1665 
glslFuncvkt::MeshShader::__anonb99b7fd80111::MemoryBarrierParams1666 	std::string glslFunc () const
1667 	{
1668 		std::string funcName;
1669 
1670 		switch (memBarrierType)
1671 		{
1672 		case MemoryBarrierType::SHARED:		funcName = "memoryBarrierShared";	break;
1673 		case MemoryBarrierType::GROUP:		funcName = "groupMemoryBarrier";	break;
1674 		default: DE_ASSERT(false); break;
1675 		}
1676 
1677 		return funcName;
1678 	}
1679 
1680 };
1681 
1682 class MemoryBarrierCase : public MeshShaderMiscCase
1683 {
1684 public:
MemoryBarrierCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1685 					MemoryBarrierCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1686 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1687 					{}
1688 
1689 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1690 	TestInstance*	createInstance			(Context& context) const override;
1691 
1692 	static constexpr uint32_t kLocalInvocations = 2u;
1693 };
1694 
1695 class MemoryBarrierInstance : public MeshShaderMiscInstance
1696 {
1697 public:
MemoryBarrierInstance(Context & context,const MiscTestParams * params)1698 	MemoryBarrierInstance (Context& context, const MiscTestParams* params)
1699 		: MeshShaderMiscInstance (context, params)
1700 	{}
1701 
1702 	void	generateReferenceLevel	() override;
1703 	bool	verifyResult			(const tcu::ConstPixelBufferAccess& resultAccess) const override;
1704 
1705 protected:
1706 	// Allow two possible outcomes.
1707 	std::unique_ptr<tcu::TextureLevel>	m_referenceLevel2;
1708 };
1709 
createInstance(Context & context) const1710 TestInstance* MemoryBarrierCase::createInstance (Context& context) const
1711 {
1712 	return new MemoryBarrierInstance(context, m_params.get());
1713 }
1714 
generateReferenceLevel()1715 void MemoryBarrierInstance::generateReferenceLevel ()
1716 {
1717 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1718 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f), m_referenceLevel2);
1719 }
1720 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess) const1721 bool MemoryBarrierInstance::verifyResult (const tcu::ConstPixelBufferAccess& resultAccess) const
1722 {
1723 	// Any of the two results is considered valid.
1724 	constexpr auto Message		= tcu::TestLog::Message;
1725 	constexpr auto EndMessage	= tcu::TestLog::EndMessage;
1726 
1727 	// Clarify what we are checking in the logs; otherwise, they could be confusing.
1728 	auto& log = m_context.getTestContext().getLog();
1729 	const std::vector<tcu::TextureLevel*> levels = { m_referenceLevel.get(), m_referenceLevel2.get() };
1730 
1731 	bool good = false;
1732 	for (size_t i = 0; i < levels.size(); ++i)
1733 	{
1734 		log << Message << "Comparing result with reference " << i << "..." << EndMessage;
1735 		const auto success = MeshShaderMiscInstance::verifyResult(resultAccess, *levels[i]);
1736 		if (success)
1737 		{
1738 			log << Message << "Match! The test has passed" << EndMessage;
1739 			good = true;
1740 			break;
1741 		}
1742 	}
1743 
1744 	return good;
1745 }
1746 
initPrograms(vk::SourceCollections & programCollection) const1747 void MemoryBarrierCase::initPrograms (vk::SourceCollections& programCollection) const
1748 {
1749 	const auto buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1750 	const auto params		= dynamic_cast<MemoryBarrierParams*>(m_params.get());
1751 	DE_ASSERT(params);
1752 
1753 	// Generate frag shader.
1754 	MeshShaderMiscCase::initPrograms(programCollection);
1755 
1756 	DE_ASSERT(params->meshCount == tcu::UVec3(1u, 1u, 1u));
1757 	DE_ASSERT(params->width == 1u && params->height == 1u);
1758 
1759 	const bool taskShader = params->needsTaskShader();
1760 
1761 	const std::string	taskDataDecl	= "struct TaskData { float blue; }; taskPayloadSharedEXT TaskData td;\n\n";
1762 	const auto			barrierFunc		= params->glslFunc();
1763 
1764 	const std::string taskAction	= "td.blue = float(iterations % 2u);\nworkGroupSize = uvec3(1u, 1u, 1u);\n";
1765 	const std::string meshAction	= "vertPrim = uvec2(1u, 1u);\n";
1766 	const std::string action		= (taskShader ? taskAction : meshAction);
1767 
1768 	const std::string	sharedDecl = "shared uint flags[2];\n\n";
1769 	std::ostringstream	verification;
1770 	verification
1771 		<< "flags[gl_LocalInvocationIndex] = 0u;\n"
1772 		<< "barrier();\n"
1773 		<< "flags[gl_LocalInvocationIndex] = 1u;\n"
1774 		<<  barrierFunc << "();\n"
1775 		<< "uint otherInvocation = 1u - gl_LocalInvocationIndex;\n"
1776 		<< "uint iterations = 0u;\n"
1777 		<< "while (flags[otherInvocation] != 1u) {\n"
1778 		<< "    iterations++;\n"
1779 		<< "}\n"
1780 		<< "if (gl_LocalInvocationIndex == 0u) {\n"
1781 		<< "\n"
1782 		<< action
1783 		<< "\n"
1784 		<< "}\n"
1785 		;
1786 
1787 	// The mesh shader is very similar in both cases, so we use a template.
1788 	std::ostringstream meshTemplateStr;
1789 	meshTemplateStr
1790 		<< "#version 450\n"
1791 		<< "#extension GL_EXT_mesh_shader : enable\n"
1792 		<< "\n"
1793 		<< "layout (local_size_x=${LOCAL_SIZE}) in;\n"
1794 		<< "layout (points) out;\n"
1795 		<< "layout (max_vertices=1, max_primitives=1) out;\n"
1796 		<< "\n"
1797 		<< "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1798 		<< "\n"
1799 		<< "${GLOBALS}"
1800 		<< "void main ()\n"
1801 		<< "{\n"
1802 		<< "    uvec2 vertPrim = uvec2(0u, 0u);\n"
1803 		<< "${BODY}"
1804 		<< "    SetMeshOutputsEXT(vertPrim.x, vertPrim.y);\n"
1805 		<< "    if (gl_LocalInvocationIndex == 0u && vertPrim.x > 0u) {\n"
1806 		<< "        gl_MeshVerticesEXT[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1807 		<< "        gl_MeshVerticesEXT[0].gl_PointSize = 1.0;\n"
1808 		<< "        primitiveColor[0] = vec4(0.0, 0.0, ${BLUE}, 1.0);\n"
1809 		<< "        gl_PrimitivePointIndicesEXT[0] = 0;\n"
1810 		<< "    }\n"
1811 		<< "}\n"
1812 		;
1813 	const tcu::StringTemplate meshTemplate = meshTemplateStr.str();
1814 
1815 	if (params->needsTaskShader())
1816 	{
1817 		std::ostringstream task;
1818 		task
1819 			<< "#version 450\n"
1820 			<< "#extension GL_EXT_mesh_shader : enable\n"
1821 			<< "\n"
1822 			<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1823 			<< "\n"
1824 			<< sharedDecl
1825 			<< taskDataDecl
1826 			<< "void main ()\n"
1827 			<< "{\n"
1828 			<< "    uvec3 workGroupSize = uvec3(0u, 0u, 0u);\n"
1829 			<< verification.str()
1830 			<< "    EmitMeshTasksEXT(workGroupSize.x, workGroupSize.y, workGroupSize.z);\n"
1831 			<< "}\n"
1832 			;
1833 
1834 		std::map<std::string, std::string> replacements;
1835 		replacements["LOCAL_SIZE"]	= "1";
1836 		replacements["BODY"]		= meshAction;
1837 		replacements["GLOBALS"]		= taskDataDecl;
1838 		replacements["BLUE"]		= "td.blue";
1839 
1840 		const auto meshStr = meshTemplate.specialize(replacements);
1841 
1842 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1843 		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr) << buildOptions;
1844 	}
1845 	else
1846 	{
1847 		std::map<std::string, std::string> replacements;
1848 		replacements["LOCAL_SIZE"]	= std::to_string(kLocalInvocations);
1849 		replacements["BODY"]		= verification.str();
1850 		replacements["GLOBALS"]		= sharedDecl;
1851 		replacements["BLUE"]		= "float(iterations % 2u)";
1852 
1853 		const auto meshStr = meshTemplate.specialize(replacements);
1854 
1855 		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr) << buildOptions;
1856 	}
1857 }
1858 
1859 // Test the task payload can be read by all invocations in the work group.
1860 class PayloadReadCase : public MeshShaderMiscCase
1861 {
1862 public:
PayloadReadCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1863 					PayloadReadCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1864 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1865 					{}
1866 
1867 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1868 	TestInstance*	createInstance			(Context& context) const override;
1869 
1870 	static constexpr uint32_t kLocalInvocations = 128u;
1871 };
1872 
1873 class PayloadReadInstance : public MeshShaderMiscInstance
1874 {
1875 public:
PayloadReadInstance(Context & context,const MiscTestParams * params)1876 	PayloadReadInstance (Context& context, const MiscTestParams* params)
1877 		: MeshShaderMiscInstance (context, params)
1878 	{}
1879 
1880 	void	generateReferenceLevel	() override;
1881 };
1882 
createInstance(Context & context) const1883 TestInstance* PayloadReadCase::createInstance (Context &context) const
1884 {
1885 	return new PayloadReadInstance(context, m_params.get());
1886 }
1887 
initPrograms(vk::SourceCollections & programCollection) const1888 void PayloadReadCase::initPrograms (vk::SourceCollections &programCollection) const
1889 {
1890 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1891 
1892 	// Add default fragment shader.
1893 	MeshShaderMiscCase::initPrograms(programCollection);
1894 
1895 	std::ostringstream taskPayload;
1896 	taskPayload
1897 		<< "struct TaskData {\n"
1898 		<< "    uint verificationCodes[" << kLocalInvocations << "];\n"
1899 		<< "    vec4 color;\n"
1900 		<< "};\n"
1901 		<< "taskPayloadSharedEXT TaskData td;\n"
1902 		;
1903 	const std::string taskPayloadDecl = taskPayload.str();
1904 
1905 	DE_ASSERT(m_params->needsTaskShader());
1906 
1907 	const auto& meshCount = m_params->meshCount;
1908 	DE_ASSERT(meshCount.x() == 1u && meshCount.y() == 1u && meshCount.z() == 1u);
1909 
1910 	const auto kLocalInvocations2 = kLocalInvocations * 2u;
1911 
1912 	std::ostringstream task;
1913 	task
1914 		<< "#version 450\n"
1915 		<< "#extension GL_EXT_mesh_shader : enable\n"
1916 		<< "\n"
1917 		<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1918 		<< "\n"
1919 		<< taskPayloadDecl
1920 		<< "shared uint verificationOK[" << kLocalInvocations << "];\n"
1921 		<< "\n"
1922 		<< "void main ()\n"
1923 		<< "{\n"
1924 		<< "    td.verificationCodes[gl_LocalInvocationIndex] = (" << kLocalInvocations2 << " - gl_LocalInvocationIndex);\n"
1925 		<< "    memoryBarrierShared();\n"
1926 		<< "    barrier();\n"
1927 		// Verify all codes from all invocations.
1928 		<< "    uint verificationResult = 1u;\n"
1929 		<< "    for (uint i = 0u; i < " << kLocalInvocations << "; ++i) {\n"
1930 		<< "        if (td.verificationCodes[i] != (" << kLocalInvocations2 << " - i)) {\n"
1931 		<< "            verificationResult = 0u;\n"
1932 		<< "            break;\n"
1933 		<< "        }\n"
1934 		<< "    }\n"
1935 		<< "    verificationOK[gl_LocalInvocationIndex] = verificationResult;\n"
1936 		<< "    memoryBarrierShared();\n"
1937 		<< "    barrier();\n"
1938 		// Check all verifications were OK (from the first invocation).
1939 		<< "    if (gl_LocalInvocationIndex == 0u) {\n"
1940 		<< "        vec4 color = vec4(0.0, 0.0, 1.0, 1.0);\n"
1941 		<< "        for (uint i = 0u; i < " << kLocalInvocations << "; ++i) {\n"
1942 		<< "            if (verificationOK[i] == 0u) {\n"
1943 		<< "                color = vec4(0.0, 0.0, 0.0, 1.0);\n"
1944 		<< "            }\n"
1945 		<< "        }\n"
1946 		<< "        td.color = color;\n"
1947 		<< "    }\n"
1948 		<< "    EmitMeshTasksEXT(" << meshCount.x() << ", " << meshCount.y() << ", " << meshCount.z() << ");\n"
1949 		<< "}\n"
1950 		;
1951 	programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1952 
1953 	std::ostringstream mesh;
1954 	mesh
1955 		<< "#version 450\n"
1956 		<< "#extension GL_EXT_mesh_shader : enable\n"
1957 		<< "\n"
1958 		<< "layout (local_size_x=1) in;\n"
1959 		<< "layout (triangles) out;\n"
1960 		<< "layout (max_vertices=3, max_primitives=1) out;\n"
1961 		<< "\n"
1962 		<< "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1963 		<< taskPayloadDecl
1964 		<< "\n"
1965 		<< "void main ()\n"
1966 		<< "{\n"
1967 		// Verify data one more time from the mesh shader invocation.
1968 		<< "    uint verificationResult = 1u;\n"
1969 		<< "    for (uint i = 0u; i < " << kLocalInvocations << "; ++i) {\n"
1970 		<< "        if (td.verificationCodes[i] != (" << kLocalInvocations2 << " - i)) {\n"
1971 		<< "            verificationResult = 0u;\n"
1972 		<< "            break;\n"
1973 		<< "        }\n"
1974 		<< "    }\n"
1975 		<< "    const vec4 finalColor = ((verificationResult == 0u) ? vec4(0.0, 0.0, 0.0, 1.0) : td.color);\n"
1976 		<< "\n"
1977 		<< "    SetMeshOutputsEXT(3u, 1u);\n"
1978 		<< "\n"
1979 		<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
1980 		<< "    gl_MeshVerticesEXT[1].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
1981 		<< "    gl_MeshVerticesEXT[2].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
1982 		<< "\n"
1983 		<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
1984 		<< "    primitiveColor[0] = finalColor;\n"
1985 		<< "}\n"
1986 		;
1987 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1988 }
1989 
generateReferenceLevel()1990 void PayloadReadInstance::generateReferenceLevel ()
1991 {
1992 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1993 }
1994 
1995 // Test with custom per-vertex and per-primitive attributes of different types.
1996 class CustomAttributesCase : public MeshShaderMiscCase
1997 {
1998 public:
CustomAttributesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1999 					CustomAttributesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
2000 						: MeshShaderMiscCase(testCtx, name, description, std::move(params)) {}
~CustomAttributesCase(void)2001 	virtual			~CustomAttributesCase		(void) {}
2002 
2003 	TestInstance*	createInstance				(Context& context) const override;
2004 	void			checkSupport				(Context& context) const override;
2005 	void			initPrograms				(vk::SourceCollections& programCollection) const override;
2006 };
2007 
2008 class CustomAttributesInstance : public MeshShaderMiscInstance
2009 {
2010 public:
CustomAttributesInstance(Context & context,const MiscTestParams * params)2011 						CustomAttributesInstance	(Context& context, const MiscTestParams* params)
2012 							: MeshShaderMiscInstance(context, params) {}
~CustomAttributesInstance(void)2013 	virtual				~CustomAttributesInstance	(void) {}
2014 
2015 	void				generateReferenceLevel		() override;
2016 	tcu::TestStatus		iterate						(void) override;
2017 };
2018 
createInstance(Context & context) const2019 TestInstance* CustomAttributesCase::createInstance (Context& context) const
2020 {
2021 	return new CustomAttributesInstance(context, m_params.get());
2022 }
2023 
checkSupport(Context & context) const2024 void CustomAttributesCase::checkSupport (Context& context) const
2025 {
2026 	MeshShaderMiscCase::checkSupport(context);
2027 
2028 	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_MULTI_VIEWPORT);
2029 	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_CLIP_DISTANCE);
2030 }
2031 
initPrograms(vk::SourceCollections & programCollection) const2032 void CustomAttributesCase::initPrograms (vk::SourceCollections& programCollection) const
2033 {
2034 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2035 
2036 	std::ostringstream frag;
2037 	frag
2038 		<< "#version 450\n"
2039 		<< "#extension GL_EXT_mesh_shader : enable\n"
2040 		<< "\n"
2041 		<< "layout (location=0) in vec4 customAttribute1;\n"
2042 		<< "layout (location=1) in flat float customAttribute2;\n"
2043 		<< "layout (location=2) in flat int customAttribute3;\n"
2044 		<< "\n"
2045 		<< "layout (location=3) in perprimitiveEXT flat uvec4 customAttribute4;\n"
2046 		<< "layout (location=4) in perprimitiveEXT float customAttribute5;\n"
2047 		<< "\n"
2048 		<< "layout (location=0) out vec4 outColor;\n"
2049 		<< "\n"
2050 		<< "void main ()\n"
2051 		<< "{\n"
2052 		<< "    bool goodPrimitiveID = (gl_PrimitiveID == 1000 || gl_PrimitiveID == 1001);\n"
2053 		<< "    bool goodViewportIndex = (gl_ViewportIndex == 1);\n"
2054 		<< "    bool goodCustom1 = (customAttribute1.x >= 0.25 && customAttribute1.x <= 0.5 &&\n"
2055 		<< "                        customAttribute1.y >= 0.5  && customAttribute1.y <= 1.0 &&\n"
2056 		<< "                        customAttribute1.z >= 10.0 && customAttribute1.z <= 20.0 &&\n"
2057 		<< "                        customAttribute1.w == 3.0);\n"
2058 		<< "    bool goodCustom2 = (customAttribute2 == 1.0 || customAttribute2 == 2.0);\n"
2059 		<< "    bool goodCustom3 = (customAttribute3 == 3 || customAttribute3 == 4);\n"
2060 		<< "    bool goodCustom4 = ((gl_PrimitiveID == 1000 && customAttribute4 == uvec4(100, 101, 102, 103)) ||\n"
2061 		<< "                        (gl_PrimitiveID == 1001 && customAttribute4 == uvec4(200, 201, 202, 203)));\n"
2062 		<< "    bool goodCustom5 = ((gl_PrimitiveID == 1000 && customAttribute5 == 6.0) ||\n"
2063 		<< "                        (gl_PrimitiveID == 1001 && customAttribute5 == 7.0));\n"
2064 		<< "    \n"
2065 		<< "    if (goodPrimitiveID && goodViewportIndex && goodCustom1 && goodCustom2 && goodCustom3 && goodCustom4 && goodCustom5) {\n"
2066 		<< "        outColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
2067 		<< "    } else {\n"
2068 		<< "        outColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
2069 		<< "    }\n"
2070 		<< "}\n"
2071 		;
2072 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
2073 
2074 	std::ostringstream pvdDataDeclStream;
2075 	pvdDataDeclStream
2076 		<< "    vec4 positions[4];\n"
2077 		<< "    float pointSizes[4];\n"
2078 		<< "    float clipDistances[4];\n"
2079 		<< "    vec4 custom1[4];\n"
2080 		<< "    float custom2[4];\n"
2081 		<< "    int custom3[4];\n"
2082 		;
2083 	const auto pvdDataDecl = pvdDataDeclStream.str();
2084 
2085 	std::ostringstream ppdDataDeclStream;
2086 	ppdDataDeclStream
2087 		<< "    int primitiveIds[2];\n"
2088 		<< "    int viewportIndices[2];\n"
2089 		<< "    uvec4 custom4[2];\n"
2090 		<< "    float custom5[2];\n"
2091 		;
2092 	const auto ppdDataDecl = ppdDataDeclStream.str();
2093 
2094 	std::ostringstream bindingsDeclStream;
2095 	bindingsDeclStream
2096 		<< "layout (set=0, binding=0, std430) buffer PerVertexData {\n"
2097 		<< pvdDataDecl
2098 		<< "} pvd;\n"
2099 		<< "layout (set=0, binding=1) uniform PerPrimitiveData {\n"
2100 		<< ppdDataDecl
2101 		<< "} ppd;\n"
2102 		<< "\n"
2103 		;
2104 	const auto bindingsDecl = bindingsDeclStream.str();
2105 
2106 	std::ostringstream taskDataStream;
2107 	taskDataStream
2108 		<< "struct TaskData {\n"
2109 		<< pvdDataDecl
2110 		<< ppdDataDecl
2111 		<< "};\n"
2112 		<< "taskPayloadSharedEXT TaskData td;\n"
2113 		<< "\n"
2114 		;
2115 	const auto taskDataDecl = taskDataStream.str();
2116 
2117 	const auto taskShader = m_params->needsTaskShader();
2118 
2119 	const auto meshPvdPrefix = (taskShader ? "td" : "pvd");
2120 	const auto meshPpdPrefix = (taskShader ? "td" : "ppd");
2121 
2122 	std::ostringstream mesh;
2123 	mesh
2124 		<< "#version 450\n"
2125 		<< "#extension GL_EXT_mesh_shader : enable\n"
2126 		<< "\n"
2127 		<< "layout (local_size_x=1) in;\n"
2128 		<< "layout (max_primitives=2, max_vertices=4) out;\n"
2129 		<< "layout (triangles) out;\n"
2130 		<< "\n"
2131 		<< "out gl_MeshPerVertexEXT {\n"
2132 		<< "    vec4  gl_Position;\n"
2133 		<< "    float gl_PointSize;\n"
2134 		<< "    float gl_ClipDistance[1];\n"
2135 		<< "} gl_MeshVerticesEXT[];\n"
2136 		<< "\n"
2137 		<< "layout (location=0) out vec4 customAttribute1[];\n"
2138 		<< "layout (location=1) out flat float customAttribute2[];\n"
2139 		<< "layout (location=2) out int customAttribute3[];\n"
2140 		<< "\n"
2141 		<< "layout (location=3) out perprimitiveEXT uvec4 customAttribute4[];\n"
2142 		<< "layout (location=4) out perprimitiveEXT float customAttribute5[];\n"
2143 		<< "\n"
2144 		<< "out perprimitiveEXT gl_MeshPerPrimitiveEXT {\n"
2145 		<< "  int gl_PrimitiveID;\n"
2146 		<< "  int gl_ViewportIndex;\n"
2147 		<< "} gl_MeshPrimitivesEXT[];\n"
2148 		<< "\n"
2149 		<< (taskShader ? taskDataDecl : bindingsDecl)
2150 		<< "void main ()\n"
2151 		<< "{\n"
2152 		<< "    SetMeshOutputsEXT(4u, 2u);\n"
2153 		<< "\n"
2154 		<< "    gl_MeshVerticesEXT[0].gl_Position = " << meshPvdPrefix << ".positions[0]; //vec4(-1.0, -1.0, 0.0, 1.0)\n"
2155 		<< "    gl_MeshVerticesEXT[1].gl_Position = " << meshPvdPrefix << ".positions[1]; //vec4( 1.0, -1.0, 0.0, 1.0)\n"
2156 		<< "    gl_MeshVerticesEXT[2].gl_Position = " << meshPvdPrefix << ".positions[2]; //vec4(-1.0,  1.0, 0.0, 1.0)\n"
2157 		<< "    gl_MeshVerticesEXT[3].gl_Position = " << meshPvdPrefix << ".positions[3]; //vec4( 1.0,  1.0, 0.0, 1.0)\n"
2158 		<< "\n"
2159 		<< "    gl_MeshVerticesEXT[0].gl_PointSize = " << meshPvdPrefix << ".pointSizes[0]; //1.0\n"
2160 		<< "    gl_MeshVerticesEXT[1].gl_PointSize = " << meshPvdPrefix << ".pointSizes[1]; //1.0\n"
2161 		<< "    gl_MeshVerticesEXT[2].gl_PointSize = " << meshPvdPrefix << ".pointSizes[2]; //1.0\n"
2162 		<< "    gl_MeshVerticesEXT[3].gl_PointSize = " << meshPvdPrefix << ".pointSizes[3]; //1.0\n"
2163 		<< "\n"
2164 		<< "    // Remove geometry on the right side.\n"
2165 		<< "    gl_MeshVerticesEXT[0].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[0]; // 1.0\n"
2166 		<< "    gl_MeshVerticesEXT[1].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[1]; //-1.0\n"
2167 		<< "    gl_MeshVerticesEXT[2].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[2]; // 1.0\n"
2168 		<< "    gl_MeshVerticesEXT[3].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[3]; //-1.0\n"
2169 		<< "    \n"
2170 		<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
2171 		<< "    gl_PrimitiveTriangleIndicesEXT[1] = uvec3(2, 3, 1);\n"
2172 		<< "\n"
2173 		<< "    gl_MeshPrimitivesEXT[0].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[0]; //1000\n"
2174 		<< "    gl_MeshPrimitivesEXT[1].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[1]; //1001\n"
2175 		<< "\n"
2176 		<< "    gl_MeshPrimitivesEXT[0].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[0]; //1\n"
2177 		<< "    gl_MeshPrimitivesEXT[1].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[1]; //1\n"
2178 		<< "\n"
2179 		<< "    // Custom per-vertex attributes\n"
2180 		<< "    customAttribute1[0] = " << meshPvdPrefix << ".custom1[0]; //vec4(0.25, 0.5, 10.0, 3.0)\n"
2181 		<< "    customAttribute1[1] = " << meshPvdPrefix << ".custom1[1]; //vec4(0.25, 1.0, 20.0, 3.0)\n"
2182 		<< "    customAttribute1[2] = " << meshPvdPrefix << ".custom1[2]; //vec4( 0.5, 0.5, 20.0, 3.0)\n"
2183 		<< "    customAttribute1[3] = " << meshPvdPrefix << ".custom1[3]; //vec4( 0.5, 1.0, 10.0, 3.0)\n"
2184 		<< "\n"
2185 		<< "    customAttribute2[0] = " << meshPvdPrefix << ".custom2[0]; //1.0f\n"
2186 		<< "    customAttribute2[1] = " << meshPvdPrefix << ".custom2[1]; //1.0f\n"
2187 		<< "    customAttribute2[2] = " << meshPvdPrefix << ".custom2[2]; //2.0f\n"
2188 		<< "    customAttribute2[3] = " << meshPvdPrefix << ".custom2[3]; //2.0f\n"
2189 		<< "\n"
2190 		<< "    customAttribute3[0] = " << meshPvdPrefix << ".custom3[0]; //3\n"
2191 		<< "    customAttribute3[1] = " << meshPvdPrefix << ".custom3[1]; //3\n"
2192 		<< "    customAttribute3[2] = " << meshPvdPrefix << ".custom3[2]; //4\n"
2193 		<< "    customAttribute3[3] = " << meshPvdPrefix << ".custom3[3]; //4\n"
2194 		<< "\n"
2195 		<< "    // Custom per-primitive attributes.\n"
2196 		<< "    customAttribute4[0] = " << meshPpdPrefix << ".custom4[0]; //uvec4(100, 101, 102, 103)\n"
2197 		<< "    customAttribute4[1] = " << meshPpdPrefix << ".custom4[1]; //uvec4(200, 201, 202, 203)\n"
2198 		<< "\n"
2199 		<< "    customAttribute5[0] = " << meshPpdPrefix << ".custom5[0]; //6.0\n"
2200 		<< "    customAttribute5[1] = " << meshPpdPrefix << ".custom5[1]; //7.0\n"
2201 		<< "}\n"
2202 		;
2203 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2204 
2205 	if (taskShader)
2206 	{
2207 		const auto& meshCount = m_params->meshCount;
2208 		std::ostringstream task;
2209 		task
2210 			<< "#version 450\n"
2211 			<< "#extension GL_EXT_mesh_shader : enable\n"
2212 			<< "\n"
2213 			<< taskDataDecl
2214 			<< bindingsDecl
2215 			<< "void main ()\n"
2216 			<< "{\n"
2217 			<< "    td.positions[0] = pvd.positions[0];\n"
2218 			<< "    td.positions[1] = pvd.positions[1];\n"
2219 			<< "    td.positions[2] = pvd.positions[2];\n"
2220 			<< "    td.positions[3] = pvd.positions[3];\n"
2221 			<< "\n"
2222 			<< "    td.pointSizes[0] = pvd.pointSizes[0];\n"
2223 			<< "    td.pointSizes[1] = pvd.pointSizes[1];\n"
2224 			<< "    td.pointSizes[2] = pvd.pointSizes[2];\n"
2225 			<< "    td.pointSizes[3] = pvd.pointSizes[3];\n"
2226 			<< "\n"
2227 			<< "    td.clipDistances[0] = pvd.clipDistances[0];\n"
2228 			<< "    td.clipDistances[1] = pvd.clipDistances[1];\n"
2229 			<< "    td.clipDistances[2] = pvd.clipDistances[2];\n"
2230 			<< "    td.clipDistances[3] = pvd.clipDistances[3];\n"
2231 			<< "\n"
2232 			<< "    td.custom1[0] = pvd.custom1[0];\n"
2233 			<< "    td.custom1[1] = pvd.custom1[1];\n"
2234 			<< "    td.custom1[2] = pvd.custom1[2];\n"
2235 			<< "    td.custom1[3] = pvd.custom1[3];\n"
2236 			<< "\n"
2237 			<< "    td.custom2[0] = pvd.custom2[0];\n"
2238 			<< "    td.custom2[1] = pvd.custom2[1];\n"
2239 			<< "    td.custom2[2] = pvd.custom2[2];\n"
2240 			<< "    td.custom2[3] = pvd.custom2[3];\n"
2241 			<< "\n"
2242 			<< "    td.custom3[0] = pvd.custom3[0];\n"
2243 			<< "    td.custom3[1] = pvd.custom3[1];\n"
2244 			<< "    td.custom3[2] = pvd.custom3[2];\n"
2245 			<< "    td.custom3[3] = pvd.custom3[3];\n"
2246 			<< "\n"
2247 			<< "    td.primitiveIds[0] = ppd.primitiveIds[0];\n"
2248 			<< "    td.primitiveIds[1] = ppd.primitiveIds[1];\n"
2249 			<< "\n"
2250 			<< "    td.viewportIndices[0] = ppd.viewportIndices[0];\n"
2251 			<< "    td.viewportIndices[1] = ppd.viewportIndices[1];\n"
2252 			<< "\n"
2253 			<< "    td.custom4[0] = ppd.custom4[0];\n"
2254 			<< "    td.custom4[1] = ppd.custom4[1];\n"
2255 			<< "\n"
2256 			<< "    td.custom5[0] = ppd.custom5[0];\n"
2257 			<< "    td.custom5[1] = ppd.custom5[1];\n"
2258 			<< "\n"
2259 			<< "    EmitMeshTasksEXT(" << meshCount.x() << ", " << meshCount.y() << ", " << meshCount.z() << ");\n"
2260 			<< "}\n"
2261 			;
2262 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
2263 	}
2264 }
2265 
generateReferenceLevel()2266 void CustomAttributesInstance::generateReferenceLevel ()
2267 {
2268 	const auto format		= getOutputFormat();
2269 	const auto tcuFormat	= mapVkFormat(format);
2270 
2271 	const auto iWidth		= static_cast<int>(m_params->width);
2272 	const auto iHeight		= static_cast<int>(m_params->height);
2273 
2274 	const auto halfWidth	= iWidth / 2;
2275 	const auto halfHeight	= iHeight / 2;
2276 
2277 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
2278 
2279 	const auto access		= m_referenceLevel->getAccess();
2280 	const auto clearColor	= tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
2281 	const auto blueColor	= tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
2282 
2283 	tcu::clear(access, clearColor);
2284 
2285 	// Fill the top left quarter.
2286 	for (int y = 0; y < halfWidth; ++y)
2287 	for (int x = 0; x < halfHeight; ++x)
2288 	{
2289 		access.setPixel(blueColor, x, y);
2290 	}
2291 }
2292 
iterate()2293 tcu::TestStatus CustomAttributesInstance::iterate ()
2294 {
2295 	struct PerVertexData
2296 	{
2297 		tcu::Vec4	positions[4];
2298 		float		pointSizes[4];
2299 		float		clipDistances[4];
2300 		tcu::Vec4	custom1[4];
2301 		float		custom2[4];
2302 		int32_t		custom3[4];
2303 	};
2304 
2305 	struct PerPrimitiveData
2306 	{
2307 		// Note some of these are declared as vectors to match the std140 layout.
2308 		tcu::IVec4	primitiveIds[2];
2309 		tcu::IVec4	viewportIndices[2];
2310 		tcu::UVec4	custom4[2];
2311 		tcu::Vec4	custom5[2];
2312 	};
2313 
2314 	const auto&		vkd			= m_context.getDeviceInterface();
2315 	const auto		device		= m_context.getDevice();
2316 	auto&			alloc		= m_context.getDefaultAllocator();
2317 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
2318 	const auto		queue		= m_context.getUniversalQueue();
2319 
2320 	const auto		imageFormat	= getOutputFormat();
2321 	const auto		tcuFormat	= mapVkFormat(imageFormat);
2322 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
2323 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2324 
2325 	const auto&		binaries	= m_context.getBinaryCollection();
2326 	const auto		hasTask		= binaries.contains("task");
2327 	const auto		bufStages	= (hasTask ? VK_SHADER_STAGE_TASK_BIT_EXT : VK_SHADER_STAGE_MESH_BIT_EXT);
2328 
2329 	const VkImageCreateInfo colorBufferInfo =
2330 	{
2331 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
2332 		nullptr,								//	const void*				pNext;
2333 		0u,										//	VkImageCreateFlags		flags;
2334 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
2335 		imageFormat,							//	VkFormat				format;
2336 		imageExtent,							//	VkExtent3D				extent;
2337 		1u,										//	uint32_t				mipLevels;
2338 		1u,										//	uint32_t				arrayLayers;
2339 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
2340 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
2341 		imageUsage,								//	VkImageUsageFlags		usage;
2342 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
2343 		0u,										//	uint32_t				queueFamilyIndexCount;
2344 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
2345 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
2346 	};
2347 
2348 	// Create color image and view.
2349 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
2350 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2351 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2352 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
2353 
2354 	// Create a memory buffer for verification.
2355 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
2356 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2357 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2358 
2359 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
2360 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
2361 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
2362 
2363 	// This needs to match what the fragment shader will expect.
2364 	const PerVertexData perVertexData =
2365 	{
2366 		//	tcu::Vec4	positions[4];
2367 		{
2368 			tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
2369 			tcu::Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
2370 			tcu::Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
2371 			tcu::Vec4( 1.0f,  1.0f, 0.0f, 1.0f),
2372 		},
2373 		//	float		pointSizes[4];
2374 		{ 1.0f, 1.0f, 1.0f, 1.0f, },
2375 		//	float		clipDistances[4];
2376 		{
2377 			1.0f,
2378 			-1.0f,
2379 			1.0f,
2380 			-1.0f,
2381 		},
2382 		//	tcu::Vec4	custom1[4];
2383 		{
2384 			tcu::Vec4(0.25, 0.5, 10.0, 3.0),
2385 			tcu::Vec4(0.25, 1.0, 20.0, 3.0),
2386 			tcu::Vec4( 0.5, 0.5, 20.0, 3.0),
2387 			tcu::Vec4( 0.5, 1.0, 10.0, 3.0),
2388 		},
2389 		//	float		custom2[4];
2390 		{ 1.0f, 1.0f, 2.0f, 2.0f, },
2391 		//	int32_t		custom3[4];
2392 		{ 3, 3, 4, 4 },
2393 	};
2394 
2395 	// This needs to match what the fragment shader will expect. Reminder: some of these are declared as gvec4 to match the std140
2396 	// layout, but only the first component is actually used.
2397 	const PerPrimitiveData perPrimitiveData =
2398 	{
2399 		//	int			primitiveIds[2];
2400 		{
2401 			tcu::IVec4(1000, 0, 0, 0),
2402 			tcu::IVec4(1001, 0, 0, 0),
2403 		},
2404 		//	int			viewportIndices[2];
2405 		{
2406 			tcu::IVec4(1, 0, 0, 0),
2407 			tcu::IVec4(1, 0, 0, 0),
2408 		},
2409 		//	uvec4		custom4[2];
2410 		{
2411 			tcu::UVec4(100u, 101u, 102u, 103u),
2412 			tcu::UVec4(200u, 201u, 202u, 203u),
2413 		},
2414 		//	float		custom5[2];
2415 		{
2416 			tcu::Vec4(6.0f, 0.0f, 0.0f, 0.0f),
2417 			tcu::Vec4(7.0f, 0.0f, 0.0f, 0.0f),
2418 		},
2419 	};
2420 
2421 	// Create and fill buffers with this data.
2422 	const auto			pvdSize		= static_cast<VkDeviceSize>(sizeof(perVertexData));
2423 	const auto			pvdInfo		= makeBufferCreateInfo(pvdSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2424 	BufferWithMemory	pvdData		(vkd, device, alloc, pvdInfo, MemoryRequirement::HostVisible);
2425 	auto&				pvdAlloc	= pvdData.getAllocation();
2426 	void*				pvdPtr		= pvdAlloc.getHostPtr();
2427 
2428 	const auto			ppdSize		= static_cast<VkDeviceSize>(sizeof(perPrimitiveData));
2429 	const auto			ppdInfo		= makeBufferCreateInfo(ppdSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
2430 	BufferWithMemory	ppdData		(vkd, device, alloc, ppdInfo, MemoryRequirement::HostVisible);
2431 	auto&				ppdAlloc	= ppdData.getAllocation();
2432 	void*				ppdPtr		= ppdAlloc.getHostPtr();
2433 
2434 	deMemcpy(pvdPtr, &perVertexData, sizeof(perVertexData));
2435 	deMemcpy(ppdPtr, &perPrimitiveData, sizeof(perPrimitiveData));
2436 
2437 	flushAlloc(vkd, device, pvdAlloc);
2438 	flushAlloc(vkd, device, ppdAlloc);
2439 
2440 	// Descriptor set layout.
2441 	DescriptorSetLayoutBuilder setLayoutBuilder;
2442 	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bufStages);
2443 	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, bufStages);
2444 	const auto setLayout = setLayoutBuilder.build(vkd, device);
2445 
2446 	// Create and update descriptor set.
2447 	DescriptorPoolBuilder descriptorPoolBuilder;
2448 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2449 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
2450 	const auto descriptorPool	= descriptorPoolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2451 	const auto descriptorSet	= makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
2452 
2453 	DescriptorSetUpdateBuilder updateBuilder;
2454 	const auto storageBufferInfo = makeDescriptorBufferInfo(pvdData.get(), 0ull, pvdSize);
2455 	const auto uniformBufferInfo = makeDescriptorBufferInfo(ppdData.get(), 0ull, ppdSize);
2456 	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &storageBufferInfo);
2457 	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferInfo);
2458 	updateBuilder.update(vkd, device);
2459 
2460 	// Pipeline layout.
2461 	const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
2462 
2463 	// Shader modules.
2464 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
2465 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
2466 
2467 	Move<VkShaderModule> taskShader;
2468 	if (hasTask)
2469 		taskShader = createShaderModule(vkd, device, binaries.get("task"));
2470 
2471 	// Render pass.
2472 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
2473 
2474 	// Framebuffer.
2475 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
2476 
2477 	// Viewport and scissor.
2478 	const auto						topHalf		= makeViewport(imageExtent.width, imageExtent.height / 2u);
2479 	const std::vector<VkViewport>	viewports	{ makeViewport(imageExtent), topHalf };
2480 	const std::vector<VkRect2D>		scissors	(2u, makeRect2D(imageExtent));
2481 
2482 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
2483 		taskShader.get(), meshShader.get(), fragShader.get(),
2484 		renderPass.get(), viewports, scissors);
2485 
2486 	// Command pool and buffer.
2487 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
2488 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2489 	const auto cmdBuffer	= cmdBufferPtr.get();
2490 
2491 	beginCommandBuffer(vkd, cmdBuffer);
2492 
2493 	// Run pipeline.
2494 	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 0.0f);
2495 	const auto		drawCount	= m_params->drawCount();
2496 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2497 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2498 	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
2499 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
2500 	endRenderPass(vkd, cmdBuffer);
2501 
2502 	// Copy color buffer to verification buffer.
2503 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
2504 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
2505 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
2506 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
2507 
2508 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
2509 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
2510 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
2511 
2512 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
2513 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
2514 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
2515 
2516 	endCommandBuffer(vkd, cmdBuffer);
2517 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2518 
2519 	// Generate reference image and compare results.
2520 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
2521 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
2522 
2523 	generateReferenceLevel();
2524 	invalidateAlloc(vkd, device, verificationBufferAlloc);
2525 	if (!verifyResult(verificationAccess))
2526 		TCU_FAIL("Result does not match reference; check log for details");
2527 
2528 	return tcu::TestStatus::pass("Pass");
2529 }
2530 
2531 // Tests that use push constants in the new stages.
2532 class PushConstantCase : public MeshShaderMiscCase
2533 {
2534 public:
PushConstantCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)2535 					PushConstantCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
2536 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
2537 					{}
2538 
2539 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
2540 	TestInstance*	createInstance			(Context& context) const override;
2541 };
2542 
2543 class PushConstantInstance : public MeshShaderMiscInstance
2544 {
2545 public:
PushConstantInstance(Context & context,const MiscTestParams * params)2546 	PushConstantInstance (Context& context, const MiscTestParams* params)
2547 		: MeshShaderMiscInstance (context, params)
2548 	{}
2549 
2550 	void			generateReferenceLevel	() override;
2551 	tcu::TestStatus	iterate					() override;
2552 };
2553 
createInstance(Context & context) const2554 TestInstance* PushConstantCase::createInstance (Context& context) const
2555 {
2556 	return new PushConstantInstance(context, m_params.get());
2557 }
2558 
generateReferenceLevel()2559 void PushConstantInstance::generateReferenceLevel ()
2560 {
2561 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2562 }
2563 
initPrograms(vk::SourceCollections & programCollection) const2564 void PushConstantCase::initPrograms (vk::SourceCollections& programCollection) const
2565 {
2566 	const auto buildOptions		= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2567 	const auto useTaskShader	= m_params->needsTaskShader();
2568 	const auto pcNumFloats		= (useTaskShader ? 2u : 4u);
2569 
2570 	std::ostringstream pushConstantStream;
2571 	pushConstantStream
2572 		<< "layout (push_constant, std430) uniform PushConstantBlock {\n"
2573 		<< "    layout (offset=${PCOFFSET}) float values[" << pcNumFloats << "];\n"
2574 		<< "} pc;\n"
2575 		<< "\n"
2576 		;
2577 	const tcu::StringTemplate pushConstantsTemplate (pushConstantStream.str());
2578 	using TemplateMap = std::map<std::string, std::string>;
2579 
2580 	std::ostringstream taskDataStream;
2581 	taskDataStream
2582 		<< "struct TaskData {\n"
2583 		<< "    float values[2];\n"
2584 		<< "};\n"
2585 		<< "taskPayloadSharedEXT TaskData td;\n"
2586 		<< "\n"
2587 		;
2588 	const auto taskDataDecl = taskDataStream.str();
2589 
2590 	if (useTaskShader)
2591 	{
2592 		TemplateMap taskMap;
2593 		taskMap["PCOFFSET"] = std::to_string(2u * sizeof(float));
2594 
2595 		const auto& meshCount = m_params->meshCount;
2596 		std::ostringstream task;
2597 		task
2598 			<< "#version 450\n"
2599 			<< "#extension GL_EXT_mesh_shader : enable\n"
2600 			<< "\n"
2601 			<< "layout(local_size_x=1) in;\n"
2602 			<< "\n"
2603 			<< taskDataDecl
2604 			<< pushConstantsTemplate.specialize(taskMap)
2605 			<< "void main ()\n"
2606 			<< "{\n"
2607 			<< "    td.values[0] = pc.values[0];\n"
2608 			<< "    td.values[1] = pc.values[1];\n"
2609 			<< "\n"
2610 			<< "    EmitMeshTasksEXT(" << meshCount.x() << ", " << meshCount.y() << ", " << meshCount.z() << ");\n"
2611 			<< "}\n"
2612 			;
2613 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
2614 	}
2615 
2616 	{
2617 		const std::string blue	= (useTaskShader ? "td.values[0] + pc.values[0]" : "pc.values[0] + pc.values[2]");
2618 		const std::string alpha	= (useTaskShader ? "td.values[1] + pc.values[1]" : "pc.values[1] + pc.values[3]");
2619 
2620 		TemplateMap meshMap;
2621 		meshMap["PCOFFSET"] = "0";
2622 
2623 		std::ostringstream mesh;
2624 		mesh
2625 			<< "#version 450\n"
2626 			<< "#extension GL_EXT_mesh_shader : enable\n"
2627 			<< "\n"
2628 			<< "layout(local_size_x=1) in;\n"
2629 			<< "layout(triangles) out;\n"
2630 			<< "layout(max_vertices=3, max_primitives=1) out;\n"
2631 			<< "\n"
2632 			<< "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
2633 			<< "\n"
2634 			<< pushConstantsTemplate.specialize(meshMap)
2635 			<< (useTaskShader ? taskDataDecl : "")
2636 			<< "void main ()\n"
2637 			<< "{\n"
2638 			<< "    SetMeshOutputsEXT(3u, 1u);\n"
2639 			<< "\n"
2640 			<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
2641 			<< "    gl_MeshVerticesEXT[1].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
2642 			<< "    gl_MeshVerticesEXT[2].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
2643 			<< "\n"
2644 			<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
2645 			<< "    triangleColor[0] = vec4(0.0, 0.0, " << blue << ", " << alpha << ");\n"
2646 			<< "}\n"
2647 			;
2648 		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2649 	}
2650 
2651 	// Add default fragment shader.
2652 	MeshShaderMiscCase::initPrograms(programCollection);
2653 }
2654 
iterate()2655 tcu::TestStatus PushConstantInstance::iterate ()
2656 {
2657 	const auto&		vkd			= m_context.getDeviceInterface();
2658 	const auto		device		= m_context.getDevice();
2659 	auto&			alloc		= m_context.getDefaultAllocator();
2660 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
2661 	const auto		queue		= m_context.getUniversalQueue();
2662 
2663 	const auto		imageFormat	= getOutputFormat();
2664 	const auto		tcuFormat	= mapVkFormat(imageFormat);
2665 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
2666 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2667 
2668 	const auto&		binaries	= m_context.getBinaryCollection();
2669 	const auto		hasTask		= binaries.contains("task");
2670 
2671 	const VkImageCreateInfo colorBufferInfo =
2672 	{
2673 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
2674 		nullptr,								//	const void*				pNext;
2675 		0u,										//	VkImageCreateFlags		flags;
2676 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
2677 		imageFormat,							//	VkFormat				format;
2678 		imageExtent,							//	VkExtent3D				extent;
2679 		1u,										//	uint32_t				mipLevels;
2680 		1u,										//	uint32_t				arrayLayers;
2681 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
2682 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
2683 		imageUsage,								//	VkImageUsageFlags		usage;
2684 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
2685 		0u,										//	uint32_t				queueFamilyIndexCount;
2686 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
2687 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
2688 	};
2689 
2690 	// Create color image and view.
2691 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
2692 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2693 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2694 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
2695 
2696 	// Create a memory buffer for verification.
2697 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
2698 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2699 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2700 
2701 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
2702 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
2703 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
2704 
2705 	// Push constant ranges.
2706 	std::vector<float> pcData { 0.25f, 0.25f, 0.75f, 0.75f };
2707 	const auto pcSize		= static_cast<uint32_t>(de::dataSize(pcData));
2708 	const auto pcHalfSize	= pcSize / 2u;
2709 
2710 	std::vector<VkPushConstantRange> pcRanges;
2711 	if (hasTask)
2712 	{
2713 		pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcHalfSize));
2714 		pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_TASK_BIT_EXT, pcHalfSize, pcHalfSize));
2715 	}
2716 	else
2717 	{
2718 		pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize));
2719 	}
2720 
2721 	// Pipeline layout.
2722 	const auto pipelineLayout = makePipelineLayout(vkd, device, 0u, nullptr, static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
2723 
2724 	// Shader modules.
2725 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
2726 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
2727 
2728 	Move<VkShaderModule> taskShader;
2729 	if (hasTask)
2730 		taskShader = createShaderModule(vkd, device, binaries.get("task"));
2731 
2732 	// Render pass.
2733 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
2734 
2735 	// Framebuffer.
2736 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
2737 
2738 	// Viewport and scissor.
2739 	const std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
2740 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(imageExtent));
2741 
2742 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
2743 		taskShader.get(), meshShader.get(), fragShader.get(),
2744 		renderPass.get(), viewports, scissors);
2745 
2746 	// Command pool and buffer.
2747 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
2748 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2749 	const auto cmdBuffer	= cmdBufferPtr.get();
2750 
2751 	beginCommandBuffer(vkd, cmdBuffer);
2752 
2753 	// Run pipeline.
2754 	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 0.0f);
2755 	const auto		drawCount	= m_params->drawCount();
2756 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2757 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2758 	for (const auto& range : pcRanges)
2759 		vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size, reinterpret_cast<const char*>(pcData.data()) + range.offset);
2760 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
2761 	endRenderPass(vkd, cmdBuffer);
2762 
2763 	// Copy color buffer to verification buffer.
2764 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
2765 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
2766 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
2767 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
2768 
2769 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
2770 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
2771 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
2772 
2773 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
2774 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
2775 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
2776 
2777 	endCommandBuffer(vkd, cmdBuffer);
2778 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2779 
2780 	// Generate reference image and compare results.
2781 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
2782 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
2783 
2784 	generateReferenceLevel();
2785 	invalidateAlloc(vkd, device, verificationBufferAlloc);
2786 	if (!verifyResult(verificationAccess))
2787 		TCU_FAIL("Result does not match reference; check log for details");
2788 
2789 	return tcu::TestStatus::pass("Pass");
2790 }
2791 
2792 // Use large work group size, large number of vertices and large number of primitives.
2793 struct MaximizeThreadsParams : public MiscTestParams
2794 {
MaximizeThreadsParamsvkt::MeshShader::__anonb99b7fd80111::MaximizeThreadsParams2795 	MaximizeThreadsParams	(const tcu::Maybe<tcu::UVec3>& taskCount_, const tcu::UVec3& meshCount_, uint32_t width_, uint32_t height_,
2796 							 uint32_t localSize_, uint32_t numVertices_, uint32_t numPrimitives_)
2797 		: MiscTestParams	(taskCount_, meshCount_, width_, height_)
2798 		, localSize			(localSize_)
2799 		, numVertices		(numVertices_)
2800 		, numPrimitives		(numPrimitives_)
2801 		{}
2802 
2803 	uint32_t localSize;
2804 	uint32_t numVertices;
2805 	uint32_t numPrimitives;
2806 
checkSupportvkt::MeshShader::__anonb99b7fd80111::MaximizeThreadsParams2807 	void checkSupport (Context& context) const
2808 	{
2809 		const auto& properties = context.getMeshShaderPropertiesEXT();
2810 
2811 		if (localSize > properties.maxMeshWorkGroupSize[0])
2812 			TCU_THROW(NotSupportedError, "Required local size not supported");
2813 
2814 		if (numVertices > properties.maxMeshOutputVertices)
2815 			TCU_THROW(NotSupportedError, "Required number of output vertices not supported");
2816 
2817 		if (numPrimitives > properties.maxMeshOutputPrimitives)
2818 			TCU_THROW(NotSupportedError, "Required number of output primitives not supported");
2819 	}
2820 };
2821 
2822 // Focus on the number of primitives.
2823 class MaximizePrimitivesCase : public MeshShaderMiscCase
2824 {
2825 public:
MaximizePrimitivesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)2826 					MaximizePrimitivesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
2827 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
2828 					{
2829 						const auto mtParams = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2830 						DE_ASSERT(mtParams);
2831 						DE_UNREF(mtParams); // For release builds.
2832 					}
2833 
2834 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
2835 	void			checkSupport			(Context& context) const override;
2836 	TestInstance*	createInstance			(Context& context) const override;
2837 };
2838 
2839 class MaximizePrimitivesInstance : public MeshShaderMiscInstance
2840 {
2841 public:
MaximizePrimitivesInstance(Context & context,const MiscTestParams * params)2842 	MaximizePrimitivesInstance (Context& context, const MiscTestParams* params)
2843 		: MeshShaderMiscInstance (context, params)
2844 	{}
2845 
2846 	void	generateReferenceLevel	() override;
2847 };
2848 
createInstance(Context & context) const2849 TestInstance* MaximizePrimitivesCase::createInstance (Context& context) const
2850 {
2851 	return new MaximizePrimitivesInstance (context, m_params.get());
2852 }
2853 
checkSupport(Context & context) const2854 void MaximizePrimitivesCase::checkSupport (Context& context) const
2855 {
2856 	MeshShaderMiscCase::checkSupport(context);
2857 
2858 	const auto params = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2859 	params->checkSupport(context);
2860 }
2861 
initPrograms(vk::SourceCollections & programCollection) const2862 void MaximizePrimitivesCase::initPrograms (vk::SourceCollections& programCollection) const
2863 {
2864 	const auto buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2865 	const auto params		= dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2866 
2867 	DE_ASSERT(!params->needsTaskShader());
2868 	MeshShaderMiscCase::initPrograms(programCollection);
2869 
2870 	// Idea behind the test: generate 128 vertices, 1 per each pixel in a 128x1 image. Then, use each vertex to generate two points,
2871 	// adding the colors of each point using color blending to make sure every point is properly generated.
2872 
2873 	DE_ASSERT(params->numPrimitives == params->numVertices * 2u);
2874 	DE_ASSERT(params->numVertices == params->width);
2875 
2876 	const auto verticesPerInvocation	= params->numVertices / params->localSize;
2877 	const auto primitivesPerVertex		= params->numPrimitives / params->numVertices;
2878 
2879 	std::ostringstream mesh;
2880 	mesh
2881 		<< "#version 450\n"
2882 		<< "#extension GL_EXT_mesh_shader : enable\n"
2883 		<< "\n"
2884 		<< "layout(local_size_x=" << params->localSize << ") in;\n"
2885 		<< "layout(points) out;\n"
2886 		<< "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
2887 		<< "\n"
2888 		<< "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
2889 		<< "\n"
2890 		<< "const uint verticesPerInvocation = " << verticesPerInvocation << ";\n"
2891 		<< "const uint primitivesPerVertex   = " << primitivesPerVertex << ";\n"
2892 		<< "\n"
2893 		<< "vec4 colors[primitivesPerVertex] = vec4[](\n"
2894 		<< "    vec4(0.0, 0.0, 1.0, 1.0),\n"
2895 		<< "    vec4(1.0, 0.0, 0.0, 1.0)\n"
2896 		<< ");\n"
2897 		<< "void main ()\n"
2898 		<< "{\n"
2899 		<< "    SetMeshOutputsEXT(" << params->numVertices << ", " << params->numPrimitives << ");\n"
2900 		<< "    const uint firstVertex = gl_LocalInvocationIndex * verticesPerInvocation;\n"
2901 		<< "    for (uint i = 0u; i < verticesPerInvocation; ++i)\n"
2902 		<< "    {\n"
2903 		<< "        const uint vertexNumber = firstVertex + i;\n"
2904 		<< "        const float xCoord = ((float(vertexNumber) + 0.5) / " << params->width << ".0) * 2.0 - 1.0;\n"
2905 		<< "        const float yCoord = 0.0;\n"
2906 		<< "        gl_MeshVerticesEXT[vertexNumber].gl_Position = vec4(xCoord, yCoord, 0.0f, 1.0f);\n"
2907 		<< "        gl_MeshVerticesEXT[vertexNumber].gl_PointSize = 1.0f;\n"
2908 		<< "        for (uint j = 0u; j < primitivesPerVertex; ++j)\n"
2909 		<< "        {\n"
2910 		<< "            const uint primitiveNumber = vertexNumber * primitivesPerVertex + j;\n"
2911 		<< "            gl_PrimitivePointIndicesEXT[primitiveNumber] = vertexNumber;\n"
2912 		<< "            pointColor[primitiveNumber] = colors[j];\n"
2913 		<< "        }\n"
2914 		<< "    }\n"
2915 		<< "}\n"
2916 		;
2917 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2918 }
2919 
generateReferenceLevel()2920 void MaximizePrimitivesInstance::generateReferenceLevel ()
2921 {
2922 	generateSolidRefLevel(tcu::Vec4(1.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2923 }
2924 
2925 // Focus on the number of vertices.
2926 class MaximizeVerticesCase : public MeshShaderMiscCase
2927 {
2928 public:
MaximizeVerticesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)2929 					MaximizeVerticesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
2930 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
2931 					{
2932 						const auto mtParams = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2933 						DE_ASSERT(mtParams);
2934 						DE_UNREF(mtParams); // For release builds.
2935 					}
2936 
2937 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
2938 	void			checkSupport			(Context& context) const override;
2939 	TestInstance*	createInstance			(Context& context) const override;
2940 };
2941 
2942 class MaximizeVerticesInstance : public MeshShaderMiscInstance
2943 {
2944 public:
MaximizeVerticesInstance(Context & context,const MiscTestParams * params)2945 	MaximizeVerticesInstance (Context& context, const MiscTestParams* params)
2946 		: MeshShaderMiscInstance (context, params)
2947 	{}
2948 
2949 	void	generateReferenceLevel	() override;
2950 };
2951 
createInstance(Context & context) const2952 TestInstance* MaximizeVerticesCase::createInstance (Context& context) const
2953 {
2954 	return new MaximizeVerticesInstance (context, m_params.get());
2955 }
2956 
checkSupport(Context & context) const2957 void MaximizeVerticesCase::checkSupport (Context& context) const
2958 {
2959 	MeshShaderMiscCase::checkSupport(context);
2960 
2961 	const auto params = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2962 	params->checkSupport(context);
2963 }
2964 
initPrograms(vk::SourceCollections & programCollection) const2965 void MaximizeVerticesCase::initPrograms (vk::SourceCollections& programCollection) const
2966 {
2967 	const auto buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2968 	const auto params		= dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2969 
2970 	DE_ASSERT(!params->needsTaskShader());
2971 	MeshShaderMiscCase::initPrograms(programCollection);
2972 
2973 	// Idea behind the test: cover a framebuffer using a triangle quad per pixel (4 vertices, 2 triangles).
2974 	DE_ASSERT(params->numVertices == params->numPrimitives * 2u);
2975 	DE_ASSERT(params->numPrimitives == params->width * 2u);
2976 
2977 	const auto pixelsPerInvocation		= params->width / params->localSize;
2978 	const auto verticesPerPixel			= 4u;
2979 	const auto primitivesPerPixel		= 2u;
2980 	const auto verticesPerInvocation	= pixelsPerInvocation * verticesPerPixel;
2981 	const auto primitivesPerInvocation	= pixelsPerInvocation * primitivesPerPixel;
2982 
2983 	std::ostringstream mesh;
2984 	mesh
2985 		<< "#version 450\n"
2986 		<< "#extension GL_EXT_mesh_shader : enable\n"
2987 		<< "\n"
2988 		<< "layout(local_size_x=" << params->localSize << ") in;\n"
2989 		<< "layout(triangles) out;\n"
2990 		<< "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
2991 		<< "\n"
2992 		<< "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
2993 		<< "\n"
2994 		<< "const uint pixelsPerInvocation     = " << pixelsPerInvocation << ";\n"
2995 		<< "const uint verticesPerInvocation   = " << verticesPerInvocation << ";\n"
2996 		<< "const uint primitivesPerInvocation = " << primitivesPerInvocation << ";\n"
2997 		<< "const uint indicesPerInvocation    = primitivesPerInvocation * 3u;\n"
2998 		<< "const uint verticesPerPixel        = " << verticesPerPixel << ";\n"
2999 		<< "const uint primitivesPerPixel      = " << primitivesPerPixel << ";\n"
3000 		<< "const uint indicesPerPixel         = primitivesPerPixel * 3u;\n"
3001 		<< "\n"
3002 		<< "void main ()\n"
3003 		<< "{\n"
3004 		<< "    SetMeshOutputsEXT(" << params->numVertices << ", " << params->numPrimitives << ");\n"
3005 		<< "\n"
3006 		<< "    const uint firstPixel    = gl_LocalInvocationIndex * pixelsPerInvocation;\n"
3007 		<< "    const float pixelWidth   = 2.0 / float(" << params->width << ");\n"
3008 		<< "    const float quarterWidth = pixelWidth / 4.0;\n"
3009 		<< "\n"
3010 		<< "    for (uint pixelIdx = 0u; pixelIdx < pixelsPerInvocation; ++pixelIdx)\n"
3011 		<< "    {\n"
3012 		<< "        const uint pixelId      = firstPixel + pixelIdx;\n"
3013 		<< "        const float pixelCenter = (float(pixelId) + 0.5) / float(" << params->width << ") * 2.0 - 1.0;\n"
3014 		<< "        const float left        = pixelCenter - quarterWidth;\n"
3015 		<< "        const float right       = pixelCenter + quarterWidth;\n"
3016 		<< "\n"
3017 		<< "        const uint firstVertex = gl_LocalInvocationIndex * verticesPerInvocation + pixelIdx * verticesPerPixel;\n"
3018 		<< "        gl_MeshVerticesEXT[firstVertex + 0].gl_Position = vec4(left,  -1.0, 0.0f, 1.0f);\n"
3019 		<< "        gl_MeshVerticesEXT[firstVertex + 1].gl_Position = vec4(left,   1.0, 0.0f, 1.0f);\n"
3020 		<< "        gl_MeshVerticesEXT[firstVertex + 2].gl_Position = vec4(right, -1.0, 0.0f, 1.0f);\n"
3021 		<< "        gl_MeshVerticesEXT[firstVertex + 3].gl_Position = vec4(right,  1.0, 0.0f, 1.0f);\n"
3022 		<< "\n"
3023 		<< "        const uint firstPrimitive = gl_LocalInvocationIndex * primitivesPerInvocation + pixelIdx * primitivesPerPixel;\n"
3024 		<< "        triangleColor[firstPrimitive + 0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
3025 		<< "        triangleColor[firstPrimitive + 1] = vec4(0.0, 0.0, 1.0, 1.0);\n"
3026 		<< "\n"
3027 		<< "        const uint firstIndex = gl_LocalInvocationIndex * indicesPerInvocation + pixelIdx * indicesPerPixel;\n"
3028 		<< "        gl_PrimitiveTriangleIndicesEXT[firstPrimitive + 0] = uvec3(firstVertex + 0, firstVertex + 1, firstVertex + 2);\n"
3029 		<< "        gl_PrimitiveTriangleIndicesEXT[firstPrimitive + 1] = uvec3(firstVertex + 1, firstVertex + 3, firstVertex + 2);\n"
3030 		<< "    }\n"
3031 		<< "}\n"
3032 		;
3033 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
3034 }
3035 
generateReferenceLevel()3036 void MaximizeVerticesInstance::generateReferenceLevel ()
3037 {
3038 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
3039 }
3040 
3041 // Focus on the number of invocations.
3042 class MaximizeInvocationsCase : public MeshShaderMiscCase
3043 {
3044 public:
MaximizeInvocationsCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)3045 					MaximizeInvocationsCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
3046 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
3047 					{
3048 						const auto mtParams = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
3049 						DE_ASSERT(mtParams);
3050 						DE_UNREF(mtParams); // For release builds.
3051 					}
3052 
3053 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
3054 	void			checkSupport			(Context& context) const override;
3055 	TestInstance*	createInstance			(Context& context) const override;
3056 };
3057 
3058 class MaximizeInvocationsInstance : public MeshShaderMiscInstance
3059 {
3060 public:
MaximizeInvocationsInstance(Context & context,const MiscTestParams * params)3061 	MaximizeInvocationsInstance (Context& context, const MiscTestParams* params)
3062 		: MeshShaderMiscInstance (context, params)
3063 	{}
3064 
3065 	void	generateReferenceLevel	() override;
3066 };
3067 
createInstance(Context & context) const3068 TestInstance* MaximizeInvocationsCase::createInstance (Context& context) const
3069 {
3070 	return new MaximizeInvocationsInstance (context, m_params.get());
3071 }
3072 
checkSupport(Context & context) const3073 void MaximizeInvocationsCase::checkSupport (Context& context) const
3074 {
3075 	MeshShaderMiscCase::checkSupport(context);
3076 
3077 	const auto params = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
3078 	params->checkSupport(context);
3079 }
3080 
initPrograms(vk::SourceCollections & programCollection) const3081 void MaximizeInvocationsCase::initPrograms (vk::SourceCollections& programCollection) const
3082 {
3083 	const auto buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
3084 	const auto params		= dynamic_cast<MaximizeThreadsParams*>(m_params.get());
3085 
3086 	DE_ASSERT(!params->needsTaskShader());
3087 	MeshShaderMiscCase::initPrograms(programCollection);
3088 
3089 	// Idea behind the test: use two invocations to generate one point per framebuffer pixel.
3090 	DE_ASSERT(params->localSize == params->width * 2u);
3091 	DE_ASSERT(params->localSize == params->numPrimitives * 2u);
3092 	DE_ASSERT(params->localSize == params->numVertices * 2u);
3093 
3094 	std::ostringstream mesh;
3095 	mesh
3096 		<< "#version 450\n"
3097 		<< "#extension GL_EXT_mesh_shader : enable\n"
3098 		<< "\n"
3099 		<< "layout(local_size_x=" << params->localSize << ") in;\n"
3100 		<< "layout(points) out;\n"
3101 		<< "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
3102 		<< "\n"
3103 		<< "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
3104 		<< "\n"
3105 		<< "void main ()\n"
3106 		<< "{\n"
3107 		<< "    SetMeshOutputsEXT(" << params->numVertices << ", " << params->numPrimitives << ");\n"
3108 		<< "    const uint pixelId = gl_LocalInvocationIndex / 2u;\n"
3109 		<< "    if (gl_LocalInvocationIndex % 2u == 0u)\n"
3110 		<< "    {\n"
3111 		<< "        const float xCoord = (float(pixelId) + 0.5) / float(" << params->width << ") * 2.0 - 1.0;\n"
3112 		<< "        gl_MeshVerticesEXT[pixelId].gl_Position = vec4(xCoord, 0.0, 0.0f, 1.0f);\n"
3113 		<< "        gl_MeshVerticesEXT[pixelId].gl_PointSize = 1.0f;\n"
3114 		<< "    }\n"
3115 		<< "    else\n"
3116 		<< "    {\n"
3117 		<< "        gl_PrimitivePointIndicesEXT[pixelId] = pixelId;\n"
3118 		<< "        pointColor[pixelId] = vec4(0.0, 0.0, 1.0, 1.0);\n"
3119 		<< "    }\n"
3120 		<< "}\n"
3121 		;
3122 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
3123 }
3124 
generateReferenceLevel()3125 void MaximizeInvocationsInstance::generateReferenceLevel ()
3126 {
3127 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
3128 }
3129 
3130 // Verify mixing classic and mesh shading pipelines in the same render pass.
3131 struct MixedPipelinesParams : public MiscTestParams
3132 {
3133 public:
3134 	bool dynamicTopology;
3135 
MixedPipelinesParamsvkt::MeshShader::__anonb99b7fd80111::MixedPipelinesParams3136 	MixedPipelinesParams (const tcu::Maybe<tcu::UVec3>& taskCount_, const tcu::UVec3& meshCount_, uint32_t width_, uint32_t height_, bool dynamicTopology_)
3137 		: MiscTestParams	(taskCount_, meshCount_, width_, height_)
3138 		, dynamicTopology	(dynamicTopology_)
3139 	{}
3140 };
3141 
3142 // Global idea behind this case: draw 4 times with classic, mesh, classic and mesh pipelines. Each draw will use a full screen quad
3143 // and a dynamic scissor to restrict drawing in the framebuffer to one specific quadrant of the color attachment. The color of each
3144 // quadrant will be taken from a push constant that changes between steps, so each quadrant ends up with a different color.
3145 class MixedPipelinesCase : public MeshShaderMiscCase
3146 {
3147 public:
MixedPipelinesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)3148 					MixedPipelinesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
3149 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
3150 					{}
3151 
3152 	void			checkSupport			(Context& context) const override;
3153 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
3154 	TestInstance*	createInstance			(Context& context) const override;
3155 };
3156 
3157 class MixedPipelinesInstance : public MeshShaderMiscInstance
3158 {
3159 public:
MixedPipelinesInstance(Context & context,const MiscTestParams * params)3160 	MixedPipelinesInstance (Context& context, const MiscTestParams* params)
3161 		: MeshShaderMiscInstance (context, params)
3162 	{}
3163 
3164 	typedef std::pair<VkRect2D, tcu::Vec4>	RectColor;
3165 	typedef std::vector<RectColor>			RectColorVec;
3166 	RectColorVec	getQuadrantColors		();
3167 	tcu::Vec4		getClearColor			();
3168 
3169 	void			generateReferenceLevel	() override;
3170 	tcu::TestStatus	iterate					() override;
3171 
3172 };
3173 
createInstance(Context & context) const3174 TestInstance* MixedPipelinesCase::createInstance (Context& context) const
3175 {
3176 	return new MixedPipelinesInstance (context, m_params.get());
3177 }
3178 
checkSupport(Context & context) const3179 void MixedPipelinesCase::checkSupport (Context& context) const
3180 {
3181 	const auto params = dynamic_cast<MixedPipelinesParams*>(m_params.get());
3182 	DE_ASSERT(params);
3183 
3184 	MeshShaderMiscCase::checkSupport(context);
3185 
3186 	if (params->dynamicTopology)
3187 		context.requireDeviceFunctionality("VK_EXT_extended_dynamic_state");
3188 }
3189 
initPrograms(vk::SourceCollections & programCollection) const3190 void MixedPipelinesCase::initPrograms (vk::SourceCollections& programCollection) const
3191 {
3192 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
3193 
3194 	DE_ASSERT(!m_params->needsTaskShader());
3195 
3196 	// The fragment shader will draw using the color indicated by the push constant.
3197 	const std::string frag =
3198 		"#version 450\n"
3199 		"\n"
3200 		"layout (location=0) out vec4 outColor;\n"
3201 		"layout (push_constant, std430) uniform PushConstantBlock {\n"
3202 		"    vec4 color;\n"
3203 		"} pc;\n"
3204 		"\n"
3205 		"void main ()\n"
3206 		"{\n"
3207 		"    outColor = pc.color;\n"
3208 		"}\n"
3209 		;
3210 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
3211 
3212 	const std::string vert =
3213 		"#version 450\n"
3214 		"\n"
3215 		"void main()\n"
3216 		"{\n"
3217 		// Full-screen clockwise triangle strip with 4 vertices.
3218 		"    const float x = (-1.0+2.0*((gl_VertexIndex & 2)>>1));\n"
3219 		"    const float y = ( 1.0-2.0* (gl_VertexIndex % 2));\n"
3220 		"    gl_Position = vec4(x, y, 0.0, 1.0);\n"
3221 		"}\n"
3222 		;
3223 	programCollection.glslSources.add("vert") << glu::VertexSource(vert);
3224 
3225 	const std::string mesh =
3226 		"#version 450\n"
3227 		"#extension GL_EXT_mesh_shader : enable\n"
3228 		"\n"
3229 		"layout(local_size_x=4) in;\n"
3230 		"layout(triangles) out;\n"
3231 		"layout(max_vertices=4, max_primitives=2) out;\n"
3232 		"\n"
3233 		"void main ()\n"
3234 		"{\n"
3235 		"    SetMeshOutputsEXT(4u, 2u);\n"
3236 		// Full-screen clockwise triangle strip with 4 vertices.
3237 		"    const float x = (-1.0+2.0*((gl_LocalInvocationIndex & 2)>>1));\n"
3238 		"    const float y = ( 1.0-2.0*((gl_LocalInvocationIndex & 1)   ));\n"
3239 		"    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(x, y, 0.0, 1.0);\n"
3240 		"    if (gl_LocalInvocationIndex == 0u) {\n"
3241 		"        gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
3242 		"        gl_PrimitiveTriangleIndicesEXT[1] = uvec3(2u, 1u, 3u);\n"
3243 		"    }\n"
3244 		"}\n"
3245 		;
3246 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh) << buildOptions;
3247 }
3248 
getQuadrantColors()3249 MixedPipelinesInstance::RectColorVec MixedPipelinesInstance::getQuadrantColors ()
3250 {
3251 	const auto width		= m_params->width;
3252 	const auto height		= m_params->height;
3253 	const auto halfWidth	= width / 2u;
3254 	const auto halfHeight	= height / 2u;
3255 	const auto iHalfWidth	= static_cast<int>(halfWidth);
3256 	const auto iHalfHeight	= static_cast<int>(halfHeight);
3257 
3258 	DE_ASSERT(width % 2u == 0u);
3259 	DE_ASSERT(height % 2u == 0u);
3260 
3261 	// Associate a different color to each rectangle.
3262 	const RectColorVec quadrantColors {
3263 		std::make_pair(makeRect2D(0,          0,           halfWidth, halfHeight), tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f)),
3264 		std::make_pair(makeRect2D(0,          iHalfHeight, halfWidth, halfHeight), tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f)),
3265 		std::make_pair(makeRect2D(iHalfWidth, 0,           halfWidth, halfHeight), tcu::Vec4(1.0f, 0.0f, 1.0f, 1.0f)),
3266 		std::make_pair(makeRect2D(iHalfWidth, iHalfHeight, halfWidth, halfHeight), tcu::Vec4(1.0f, 1.0f, 1.0f, 1.0f)),
3267 	};
3268 	return quadrantColors;
3269 }
3270 
getClearColor()3271 tcu::Vec4 MixedPipelinesInstance::getClearColor ()
3272 {
3273 	return tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f);
3274 }
3275 
generateReferenceLevel()3276 void MixedPipelinesInstance::generateReferenceLevel ()
3277 {
3278 	const auto format		= getOutputFormat();
3279 	const auto tcuFormat	= mapVkFormat(format);
3280 
3281 	const auto iWidth		= static_cast<int>(m_params->width);
3282 	const auto iHeight		= static_cast<int>(m_params->height);
3283 
3284 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
3285 
3286 	const auto access		= m_referenceLevel->getAccess();
3287 	const auto quadColors	= getQuadrantColors();
3288 	const auto clearColor	= getClearColor();
3289 
3290 	// Each image quadrant gets a different color.
3291 	tcu::clear(access, clearColor);
3292 
3293 	for (int y = 0; y < iHeight; ++y)
3294 	for (int x = 0; x < iWidth; ++x)
3295 	{
3296 		for (const auto& quadrant : quadColors)
3297 		{
3298 			const auto minX = quadrant.first.offset.x;
3299 			const auto minY = quadrant.first.offset.y;
3300 			const auto maxX = quadrant.first.offset.x + static_cast<int32_t>(quadrant.first.extent.width);
3301 			const auto maxY = quadrant.first.offset.y + static_cast<int32_t>(quadrant.first.extent.height);
3302 
3303 			if (x >= minX && x < maxX && y >= minY && y < maxY)
3304 				access.setPixel(quadrant.second, x, y);
3305 		}
3306 	}
3307 }
3308 
iterate()3309 tcu::TestStatus MixedPipelinesInstance::iterate ()
3310 {
3311 	const auto params = dynamic_cast<const MixedPipelinesParams*>(m_params);
3312 	DE_ASSERT(params);
3313 
3314 	const auto&		vkd			= m_context.getDeviceInterface();
3315 	const auto		device		= m_context.getDevice();
3316 	auto&			alloc		= m_context.getDefaultAllocator();
3317 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
3318 	const auto		queue		= m_context.getUniversalQueue();
3319 
3320 	const auto		dynTopo		= params->dynamicTopology;
3321 	const auto		imageFormat	= getOutputFormat();
3322 	const auto		tcuFormat	= mapVkFormat(imageFormat);
3323 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
3324 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3325 
3326 	const VkImageCreateInfo colorBufferInfo =
3327 	{
3328 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
3329 		nullptr,								//	const void*				pNext;
3330 		0u,										//	VkImageCreateFlags		flags;
3331 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
3332 		imageFormat,							//	VkFormat				format;
3333 		imageExtent,							//	VkExtent3D				extent;
3334 		1u,										//	uint32_t				mipLevels;
3335 		1u,										//	uint32_t				arrayLayers;
3336 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
3337 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
3338 		imageUsage,								//	VkImageUsageFlags		usage;
3339 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
3340 		0u,										//	uint32_t				queueFamilyIndexCount;
3341 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
3342 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
3343 	};
3344 
3345 	// Create color image and view.
3346 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
3347 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
3348 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
3349 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
3350 
3351 	// Create a memory buffer for verification.
3352 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
3353 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3354 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
3355 
3356 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
3357 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
3358 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
3359 
3360 	// Pipeline layouts for the mesh and classic pipelines.
3361 	const auto pcSize					= static_cast<uint32_t>(sizeof(tcu::Vec4));
3362 	const auto pcRange					= makePushConstantRange(VK_SHADER_STAGE_FRAGMENT_BIT, 0u, pcSize);
3363 	const auto classicPipelineLayout	= makePipelineLayout(vkd, device, DE_NULL, &pcRange);
3364 	const auto meshPipelineLayout		= makePipelineLayout(vkd, device, DE_NULL, &pcRange);
3365 
3366 	// Shader modules.
3367 	const auto&	binaries	= m_context.getBinaryCollection();
3368 	const auto	vertShader	= createShaderModule(vkd, device, binaries.get("vert"));
3369 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
3370 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
3371 
3372 	// Render pass.
3373 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
3374 
3375 	// Framebuffer.
3376 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
3377 
3378 	// Viewport and scissor.
3379 	const std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
3380 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(imageExtent));
3381 
3382 	// Color blending.
3383 	const auto									colorWriteMask	= (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
3384 	const VkPipelineColorBlendAttachmentState	blendAttState	=
3385 	{
3386 		VK_TRUE,				//	VkBool32				blendEnable;
3387 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			srcColorBlendFactor;
3388 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			dstColorBlendFactor;
3389 		VK_BLEND_OP_ADD,		//	VkBlendOp				colorBlendOp;
3390 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			srcAlphaBlendFactor;
3391 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			dstAlphaBlendFactor;
3392 		VK_BLEND_OP_ADD,		//	VkBlendOp				alphaBlendOp;
3393 		colorWriteMask,			//	VkColorComponentFlags	colorWriteMask;
3394 	};
3395 
3396 	const VkPipelineColorBlendStateCreateInfo colorBlendInfo =
3397 	{
3398 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//	VkStructureType								sType;
3399 		nullptr,													//	const void*									pNext;
3400 		0u,															//	VkPipelineColorBlendStateCreateFlags		flags;
3401 		VK_FALSE,													//	VkBool32									logicOpEnable;
3402 		VK_LOGIC_OP_OR,												//	VkLogicOp									logicOp;
3403 		1u,															//	uint32_t									attachmentCount;
3404 		&blendAttState,												//	const VkPipelineColorBlendAttachmentState*	pAttachments;
3405 		{ 0.0f, 0.0f, 0.0f, 0.0f },									//	float										blendConstants[4];
3406 	};
3407 
3408 	const std::vector<VkDynamicState>	meshDynamicStates		{ VK_DYNAMIC_STATE_SCISSOR };
3409 	std::vector<VkDynamicState>			classicDynamicStates	(meshDynamicStates);
3410 	if (dynTopo)
3411 		classicDynamicStates.push_back(VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT);
3412 
3413 	const VkPipelineDynamicStateCreateInfo meshDynamicStateInfo =
3414 	{
3415 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	//	VkStructureType						sType;
3416 		nullptr,												//	const void*							pNext;
3417 		0u,														//	VkPipelineDynamicStateCreateFlags	flags;
3418 		static_cast<uint32_t>(meshDynamicStates.size()),		//	uint32_t							dynamicStateCount;
3419 		de::dataOrNull(meshDynamicStates),						//	const VkDynamicState*				pDynamicStates;
3420 	};
3421 	const VkPipelineDynamicStateCreateInfo	classicDynamicStateInfo	=
3422 	{
3423 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	//	VkStructureType						sType;
3424 		nullptr,												//	const void*							pNext;
3425 		0u,														//	VkPipelineDynamicStateCreateFlags	flags;
3426 		static_cast<uint32_t>(classicDynamicStates.size()),		//	uint32_t							dynamicStateCount;
3427 		de::dataOrNull(classicDynamicStates),					//	const VkDynamicState*				pDynamicStates;
3428 	};
3429 
3430 	const auto meshPipeline = makeGraphicsPipeline(vkd, device, meshPipelineLayout.get(),
3431 		DE_NULL, meshShader.get(), fragShader.get(),
3432 		renderPass.get(), viewports, scissors, 0u/*subpass*/,
3433 		nullptr, nullptr, nullptr, &colorBlendInfo, &meshDynamicStateInfo);
3434 
3435 	const VkPipelineVertexInputStateCreateInfo vertexInputInfo = initVulkanStructure();
3436 
3437 	const auto staticTopo		= (dynTopo ? VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP);
3438 	const auto classicPipeline	= makeGraphicsPipeline(vkd, device, classicPipelineLayout.get(),
3439 		vertShader.get(), DE_NULL, DE_NULL, DE_NULL, fragShader.get(),
3440 		renderPass.get(), viewports, scissors, staticTopo, 0u/*subpass*/, 0u/*patchControlPoints*/,
3441 			&vertexInputInfo, nullptr, nullptr, nullptr, nullptr, &classicDynamicStateInfo);
3442 
3443 	// Command pool and buffer.
3444 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
3445 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3446 	const auto cmdBuffer	= cmdBufferPtr.get();
3447 
3448 	// Pipeline list.
3449 	beginCommandBuffer(vkd, cmdBuffer);
3450 
3451 	// Run pipeline.
3452 	const auto clearColor	= getClearColor();
3453 	const auto drawCount	= m_params->drawCount();
3454 	const auto quadColors	= getQuadrantColors();
3455 	DE_ASSERT(drawCount.x() == 1u && drawCount.y() == 1u && drawCount.z() == 1u);
3456 
3457 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
3458 	for (size_t idx = 0u; idx < quadColors.size(); ++idx)
3459 	{
3460 		const auto& rectColor = quadColors.at(idx);
3461 		vkd.cmdSetScissor(cmdBuffer, 0u, 1u, &rectColor.first);
3462 
3463 		if (idx % 2u == 0u)
3464 		{
3465 			vkd.cmdPushConstants(cmdBuffer, classicPipelineLayout.get(), VK_SHADER_STAGE_FRAGMENT_BIT, 0u, pcSize, &rectColor.second);
3466 			if (dynTopo)
3467 				vkd.cmdSetPrimitiveTopology(cmdBuffer, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP);
3468 			vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, classicPipeline.get());
3469 			vkd.cmdDraw(cmdBuffer, 4u, 1u, 0u, 0u);
3470 		}
3471 		else
3472 		{
3473 			vkd.cmdPushConstants(cmdBuffer, meshPipelineLayout.get(), VK_SHADER_STAGE_FRAGMENT_BIT, 0u, pcSize, &rectColor.second);
3474 			vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get());
3475 			vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
3476 		}
3477 	}
3478 	endRenderPass(vkd, cmdBuffer);
3479 
3480 	// Copy color buffer to verification buffer.
3481 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
3482 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
3483 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
3484 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
3485 
3486 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
3487 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
3488 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
3489 
3490 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
3491 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
3492 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
3493 
3494 	endCommandBuffer(vkd, cmdBuffer);
3495 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
3496 
3497 	// Generate reference image and compare results.
3498 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
3499 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
3500 
3501 	generateReferenceLevel();
3502 	invalidateAlloc(vkd, device, verificationBufferAlloc);
3503 	if (!verifyResult(verificationAccess))
3504 		TCU_FAIL("Result does not match reference; check log for details");
3505 
3506 	return tcu::TestStatus::pass("Pass");
3507 }
3508 
3509 // Tests to check SetMeshOutputsEXT() and EmitMeshTasksEXT() take values from the first invocation.
3510 class FirstInvocationCase : public MeshShaderMiscCase
3511 {
3512 public:
FirstInvocationCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)3513 					FirstInvocationCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
3514 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
3515 					{}
3516 
3517 	void			checkSupport			(Context& context) const override;
3518 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
3519 	TestInstance*	createInstance			(Context& context) const override;
3520 
3521 	static constexpr uint32_t kColoredPixels = 120u;
3522 };
3523 
3524 class FirstInvocationInstance : public MeshShaderMiscInstance
3525 {
3526 public:
FirstInvocationInstance(Context & context,const MiscTestParams * params)3527 	FirstInvocationInstance (Context& context, const MiscTestParams* params)
3528 		: MeshShaderMiscInstance (context, params)
3529 	{}
3530 
3531 	void	generateReferenceLevel	() override;
3532 };
3533 
generateReferenceLevel()3534 void FirstInvocationInstance::generateReferenceLevel ()
3535 {
3536 	DE_ASSERT(m_params->height == 1u && m_params->width == 128u);
3537 	DE_ASSERT(FirstInvocationCase::kColoredPixels < m_params->width);
3538 
3539 	const auto format		= getOutputFormat();
3540 	const auto tcuFormat	= mapVkFormat(format);
3541 
3542 	const auto iWidth		= static_cast<int>(m_params->width);
3543 	const auto iHeight		= static_cast<int>(m_params->height);
3544 
3545 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
3546 
3547 	const auto clearColor	= tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
3548 	const auto geomColor	= tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
3549 	const auto access		= m_referenceLevel->getAccess();
3550 
3551 	// Fill the expected amount of colored pixels with solid color.
3552 	for (int i = 0; i < iWidth; ++i)
3553 	{
3554 		const auto& color = ((static_cast<uint32_t>(i) < FirstInvocationCase::kColoredPixels) ? geomColor : clearColor);
3555 		access.setPixel(color, i, 0);
3556 	}
3557 }
3558 
createInstance(Context & context) const3559 TestInstance* FirstInvocationCase::createInstance (Context& context) const
3560 {
3561 	return new FirstInvocationInstance(context, m_params.get());
3562 }
3563 
checkSupport(Context & context) const3564 void FirstInvocationCase::checkSupport (Context &context) const
3565 {
3566 	MeshShaderMiscCase::checkSupport(context);
3567 
3568 	if (context.getUsedApiVersion() < VK_MAKE_VERSION(1, 1, 0))
3569 		TCU_THROW(NotSupportedError, "Vulkan API version >= 1.1 required");
3570 
3571 	const auto &subgroupProperties = context.getSubgroupProperties();
3572 	if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT))
3573 		TCU_THROW(NotSupportedError, "Subgroup basic features not supported");
3574 }
3575 
initPrograms(vk::SourceCollections & programCollection) const3576 void FirstInvocationCase::initPrograms (vk::SourceCollections& programCollection) const
3577 {
3578 	DE_ASSERT(m_params->height == 1u && m_params->width == 128u);
3579 	DE_ASSERT(kColoredPixels < m_params->width);
3580 
3581 	// Add generic fragment shader.
3582 	MeshShaderMiscCase::initPrograms(programCollection);
3583 
3584 	const bool					useTask			= m_params->needsTaskShader();
3585 	const auto					fbWidth			= m_params->width;
3586 	const auto					meshLocalSize	= (useTask ? 1u : fbWidth);
3587 	const auto					taskLocalSize	= fbWidth;
3588 	const auto					pointsPerMeshWG	= (useTask ? 1u : kColoredPixels);
3589 	const auto					jobID			= (useTask ? "gl_WorkGroupID.x" : "gl_LocalInvocationIndex");
3590 	const auto					buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
3591 
3592 	std::string taskDataDecl;
3593 	if (useTask)
3594 	{
3595 		std::ostringstream aux;
3596 		aux
3597 			<< "struct TaskData {\n"
3598 			<< "    uint values[" << taskLocalSize << "];\n"
3599 			<< "};\n"
3600 			<< "taskPayloadSharedEXT TaskData td;\n"
3601 			;
3602 		taskDataDecl = aux.str();
3603 	}
3604 
3605 	if (useTask)
3606 	{
3607 		std::ostringstream task;
3608 		task
3609 			<< "#version 450\n"
3610 			<< "#extension GL_EXT_mesh_shader : enable\n"
3611 			<< "#extension GL_KHR_shader_subgroup_basic : enable\n"
3612 			<< "\n"
3613 			<< "layout(local_size_x=" << taskLocalSize << ", local_size_y=1, local_size_z=1) in;\n"
3614 			<< "\n"
3615 			<< taskDataDecl
3616 			<< "\n"
3617 			<< "void main ()\n"
3618 			<< "{\n"
3619 			<< "    td.values[gl_LocalInvocationIndex] = gl_LocalInvocationIndex * 2u;\n"
3620 			<< "\n"
3621 			<< "    uint total_jobs = max(" << kColoredPixels << " / 2u, 1u);\n"
3622 			<< "    if (gl_LocalInvocationIndex == 0u) {\n"
3623 			<< "        total_jobs = " << kColoredPixels << ";\n"
3624 			<< "    } else if (gl_SubgroupID > 0u) {\n"
3625 			<< "        total_jobs = max(" << kColoredPixels << " / 4u, 1u);\n"
3626 			<< "    }\n"
3627 			<< "\n"
3628 			<< "    EmitMeshTasksEXT(total_jobs, 1u, 1u);\n"
3629 			<< "}\n"
3630 			;
3631 
3632 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
3633 	}
3634 
3635 	{
3636 		std::ostringstream mesh;
3637 		mesh
3638 			<< "#version 450\n"
3639 			<< "#extension GL_EXT_mesh_shader : enable\n"
3640 			<< "#extension GL_KHR_shader_subgroup_basic : enable\n"
3641 			<< "\n"
3642 			<< "layout(local_size_x=" << meshLocalSize << ", local_size_y=1, local_size_z=1) in;\n"
3643 			<< "layout(points) out;\n"
3644 			<< "layout(max_primitives=" << meshLocalSize << ", max_vertices=" << meshLocalSize << ") out;\n"
3645 			<< "\n"
3646 			<< "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
3647 			<< taskDataDecl
3648 			<< "\n"
3649 			<< "void main ()\n"
3650 			<< "{\n"
3651 			<< "    uint total_points = max(" << pointsPerMeshWG << " / 2u, 1u);\n"
3652 			<< "    \n"
3653 			;
3654 
3655 		if (!useTask)
3656 		{
3657 			mesh
3658 				<< "    if (gl_LocalInvocationIndex == 0u) {\n"
3659 				<< "        total_points = " << pointsPerMeshWG << ";\n"
3660 				<< "    } else if (gl_SubgroupID > 0u) {\n"
3661 				<< "        total_points = max(" << pointsPerMeshWG << " / 4u, 1u);\n"
3662 				<< "    }\n"
3663 				<< "    \n"
3664 				;
3665 		}
3666 
3667 		mesh
3668 			<< "    SetMeshOutputsEXT(total_points, total_points);\n"
3669 			<< "    if (gl_LocalInvocationIndex < " << pointsPerMeshWG << ") {\n"
3670 			<< "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 1.0;\n"
3671 			<< "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(((float(" << jobID << ") + 0.5) / " << fbWidth << ") * 2.0 - 1.0, 0.0, 0.0, 1.0);\n"
3672 			<< "        gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;\n"
3673 			<< "        pointColor[gl_LocalInvocationIndex] = vec4(0.0, 0.0, 1.0, 1.0);\n"
3674 			<< "    }\n"
3675 			<< "}\n"
3676 			;
3677 
3678 		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
3679 	}
3680 }
3681 
3682 // Tests that check LocalSizeId works as expected.
3683 class LocalSizeIdCase : public MeshShaderMiscCase
3684 {
3685 public:
LocalSizeIdCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)3686 					LocalSizeIdCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
3687 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
3688 					{}
3689 
3690 	void			checkSupport			(Context& context) const override;
3691 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
3692 	TestInstance*	createInstance			(Context& context) const override;
3693 };
3694 
3695 class LocalSizeIdInstance : public MeshShaderMiscInstance
3696 {
3697 public:
LocalSizeIdInstance(Context & context,const MiscTestParams * params)3698 	LocalSizeIdInstance (Context& context, const MiscTestParams* params)
3699 		: MeshShaderMiscInstance (context, params)
3700 	{}
3701 
3702 	void			generateReferenceLevel	() override;
3703 	tcu::TestStatus	iterate					() override;
3704 };
3705 
createInstance(Context & context) const3706 TestInstance* LocalSizeIdCase::createInstance (Context& context) const
3707 {
3708 	return new LocalSizeIdInstance(context, m_params.get());
3709 }
3710 
generateReferenceLevel()3711 void LocalSizeIdInstance::generateReferenceLevel ()
3712 {
3713 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
3714 }
3715 
checkSupport(Context & context) const3716 void LocalSizeIdCase::checkSupport (Context &context) const
3717 {
3718 	// Generic checks.
3719 	MeshShaderMiscCase::checkSupport(context);
3720 
3721 	// Needed for LocalSizeId.
3722 	context.requireDeviceFunctionality("VK_KHR_maintenance4");
3723 }
3724 
initPrograms(vk::SourceCollections & programCollection) const3725 void LocalSizeIdCase::initPrograms (vk::SourceCollections& programCollection) const
3726 {
3727 	const SpirVAsmBuildOptions	spvOptions	(programCollection.usedVulkanVersion, SPIRV_VERSION_1_5, false/*allowSpirv14*/, true/*allowMaintenance4*/);
3728 	const auto					useTask		= m_params->needsTaskShader();
3729 
3730 	DE_ASSERT(m_params->height == 1u && m_params->width == 32u);
3731 
3732 	// Add generic fragment shader.
3733 	MeshShaderMiscCase::initPrograms(programCollection);
3734 
3735 	if (useTask)
3736 	{
3737 		// Roughly equivalent to the following shader.
3738 		//	#version 450
3739 		//	#extension GL_EXT_mesh_shader : enable
3740 		//
3741 		//	layout(local_size_x_id=10, local_size_y_id=11, local_size_z_id=12) in;
3742 		//	struct TaskData {
3743 		//	    uint pixelID[32];
3744 		//	};
3745 		//	taskPayloadSharedEXT TaskData td;
3746 		//
3747 		//	void main ()
3748 		//	{
3749 		//	    td.pixelID[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;
3750 		//	    EmitMeshTasksEXT(1u, 1u, 1u);
3751 		//	}
3752 
3753 		std::ostringstream taskSPV;
3754 		taskSPV
3755 			<< "      ; SPIR-V\n"
3756 			<< "      ; Version: 1.0\n"
3757 			<< "      ; Generator: Khronos Glslang Reference Front End; 10\n"
3758 			<< "      ; Bound: 26\n"
3759 			<< "      ; Schema: 0\n"
3760 			<< "      OpCapability MeshShadingEXT\n"
3761 			<< "      OpExtension \"SPV_EXT_mesh_shader\"\n"
3762 			<< " %1 = OpExtInstImport \"GLSL.std.450\"\n"
3763 			<< "      OpMemoryModel Logical GLSL450\n"
3764 			<< "      OpEntryPoint TaskEXT %4 \"main\" %11 %15\n"
3765 			<< "      OpExecutionModeId %4 LocalSizeId %21 %22 %23\n"
3766 			<< "      OpDecorate %15 BuiltIn LocalInvocationIndex\n"
3767 			<< "      OpDecorate %21 SpecId 10\n"
3768 			<< "      OpDecorate %22 SpecId 11\n"
3769 			<< "      OpDecorate %23 SpecId 12\n"
3770 			<< " %2 = OpTypeVoid\n"
3771 			<< " %3 = OpTypeFunction %2\n"
3772 			<< " %6 = OpTypeInt 32 0\n"
3773 			<< " %7 = OpConstant %6 32\n"
3774 			<< " %8 = OpTypeArray %6 %7\n"
3775 			<< " %9 = OpTypeStruct %8\n"
3776 			<< "%10 = OpTypePointer TaskPayloadWorkgroupEXT %9\n"
3777 			<< "%11 = OpVariable %10 TaskPayloadWorkgroupEXT\n"
3778 			<< "%12 = OpTypeInt 32 1\n"
3779 			<< "%13 = OpConstant %12 0\n"
3780 			<< "%14 = OpTypePointer Input %6\n"
3781 			<< "%15 = OpVariable %14 Input\n"
3782 			<< "%18 = OpTypePointer TaskPayloadWorkgroupEXT %6\n"
3783 			<< "%20 = OpConstant %6 1\n"
3784 			<< "%21 = OpSpecConstant %6 1\n"
3785 			<< "%22 = OpSpecConstant %6 1\n"
3786 			<< "%23 = OpSpecConstant %6 1\n"
3787 			<< " %4 = OpFunction %2 None %3\n"
3788 			<< " %5 = OpLabel\n"
3789 			<< "%16 = OpLoad %6 %15\n"
3790 			<< "%17 = OpLoad %6 %15\n"
3791 			<< "%19 = OpAccessChain %18 %11 %13 %16\n"
3792 			<< "      OpStore %19 %17\n"
3793 			<< "      OpEmitMeshTasksEXT %20 %20 %20 %11\n"
3794 			<< "      OpFunctionEnd\n"
3795 			;
3796 
3797 		programCollection.spirvAsmSources.add("task") << taskSPV.str() << spvOptions;
3798 	}
3799 
3800 	{
3801 		// Roughly equivalent to the following shader.
3802 		//	#version 450
3803 		//	#extension GL_EXT_mesh_shader : enable
3804 		//
3805 		//	layout(local_size_x_id=20, local_size_y_id=21, local_size_z_id=22) in;
3806 		//	layout(points) out;
3807 		//	layout(max_primitives=32, max_vertices=32) out;
3808 		//
3809 		//	layout (location=0) out perprimitiveEXT vec4 pointColor[];
3810 		//#if useTask
3811 		//	struct TaskData {
3812 		//	    uint pixelID[32];
3813 		//	};
3814 		//	taskPayloadSharedEXT TaskData td;
3815 		//#endif
3816 		//
3817 		//	void main ()
3818 		//	{
3819 		//#if useTask
3820 		//	    const uint pixelId = td.pixelID[gl_LocalInvocationIndex];
3821 		//#else
3822 		//	    const uint pixelId = gl_LocalInvocationIndex;
3823 		//#endif
3824 		//	    SetMeshOutputsEXT(32u, 32u);
3825 		//	    gl_MeshVerticesEXT[pixelId].gl_PointSize = 1.0;
3826 		//	    gl_MeshVerticesEXT[pixelId].gl_Position = vec4(((float(pixelId) + 0.5) / 32.0) * 2.0 - 1.0, 0.0, 0.0, 1.0);
3827 		//	    gl_PrimitivePointIndicesEXT[pixelId] = pixelId;
3828 		//	    pointColor[pixelId] = vec4(0.0, 0.0, 1.0, 1.0);
3829 		//	}
3830 		std::ostringstream meshSPV;
3831 		meshSPV
3832 			<< "                              OpCapability MeshShadingEXT\n"
3833 			<< "                              OpExtension \"SPV_EXT_mesh_shader\"\n"
3834 			<< "                         %1 = OpExtInstImport \"GLSL.std.450\"\n"
3835 			<< "                              OpMemoryModel Logical GLSL450\n"
3836 			<< "                              OpEntryPoint MeshEXT %main \"main\" %local_invocation_index %mesh_vertices %primitive_point_indices %primitive_colors" << (useTask ? " %task_data" : "") << "\n"
3837 			<< "                              OpExecutionModeId %main LocalSizeId %constand_id_20 %constant_id_21 %constant_id_22\n"
3838 			<< "                              OpExecutionMode %main OutputVertices 32\n"
3839 			<< "                              OpExecutionMode %main OutputPrimitivesNV 32\n"
3840 			<< "                              OpExecutionMode %main OutputPoints\n"
3841 			<< "                              OpDecorate %local_invocation_index BuiltIn LocalInvocationIndex\n"
3842 			<< "                              OpMemberDecorate %mesh_vertices_struct 0 BuiltIn Position\n"
3843 			<< "                              OpMemberDecorate %mesh_vertices_struct 1 BuiltIn PointSize\n"
3844 			<< "                              OpMemberDecorate %mesh_vertices_struct 2 BuiltIn ClipDistance\n"
3845 			<< "                              OpMemberDecorate %mesh_vertices_struct 3 BuiltIn CullDistance\n"
3846 			<< "                              OpDecorate %mesh_vertices_struct Block\n"
3847 			<< "                              OpDecorate %primitive_point_indices BuiltIn PrimitivePointIndicesEXT\n"
3848 			<< "                              OpDecorate %primitive_colors PerPrimitiveEXT\n"
3849 			<< "                              OpDecorate %primitive_colors Location 0\n"
3850 			<< "                              OpDecorate %constand_id_20 SpecId 20\n"
3851 			<< "                              OpDecorate %constant_id_21 SpecId 21\n"
3852 			<< "                              OpDecorate %constant_id_22 SpecId 22\n"
3853 			<< "                 %type_void = OpTypeVoid\n"
3854 			<< "                 %void_func = OpTypeFunction %type_void\n"
3855 			<< "                       %int = OpTypeInt 32 1\n"
3856 			<< "                      %uint = OpTypeInt 32 0\n"
3857 			<< "                     %float = OpTypeFloat 32\n"
3858 			<< "                      %vec4 = OpTypeVector %float 4\n"
3859 			<< "                     %uvec3 = OpTypeVector %uint 3\n"
3860 			<< "                     %int_0 = OpConstant %int 0\n"
3861 			<< "                     %int_1 = OpConstant %int 1\n"
3862 			<< "                    %uint_1 = OpConstant %uint 1\n"
3863 			<< "                   %uint_32 = OpConstant %uint 32\n"
3864 			<< "                   %float_0 = OpConstant %float 0\n"
3865 			<< "                   %float_1 = OpConstant %float 1\n"
3866 			<< "                 %float_0_5 = OpConstant %float 0.5\n"
3867 			<< "                  %float_32 = OpConstant %float 32\n"
3868 			<< "                   %float_2 = OpConstant %float 2\n"
3869 			<< "             %float_array_1 = OpTypeArray %float %uint_1\n"
3870 			<< "             %func_uint_ptr = OpTypePointer Function %uint\n"
3871 			<< "            %input_uint_ptr = OpTypePointer Input %uint\n"
3872 			<< "    %local_invocation_index = OpVariable %input_uint_ptr Input\n"
3873 			<< "      %mesh_vertices_struct = OpTypeStruct %vec4 %float %float_array_1 %float_array_1\n"
3874 			<< "       %mesh_vertices_array = OpTypeArray %mesh_vertices_struct %uint_32\n"
3875 			<< "     %mesh_vertices_out_ptr = OpTypePointer Output %mesh_vertices_array\n"
3876 			<< "             %mesh_vertices = OpVariable %mesh_vertices_out_ptr Output\n"
3877 			<< "          %output_float_ptr = OpTypePointer Output %float\n"
3878 			<< "           %output_vec4_ptr = OpTypePointer Output %vec4\n"
3879 			<< "             %uint_array_32 = OpTypeArray %uint %uint_32\n"
3880 			<< "\n"
3881 			;
3882 
3883 		if (useTask)
3884 		{
3885 			meshSPV
3886 				<< "\n"
3887 				<< "%uint_array_32_struct                  = OpTypeStruct %uint_array_32\n"
3888 				<< "%task_payload_uint_array_32_struct_ptr = OpTypePointer TaskPayloadWorkgroupEXT %uint_array_32_struct\n"
3889 				<< "%task_data                             = OpVariable %task_payload_uint_array_32_struct_ptr TaskPayloadWorkgroupEXT\n"
3890 				<< "%task_payload_uint_ptr                 = OpTypePointer TaskPayloadWorkgroupEXT %uint\n"
3891 				<< "\n"
3892 				;
3893 		}
3894 
3895 		meshSPV
3896 			<< "  %output_uint_array_32_ptr = OpTypePointer Output %uint_array_32\n"
3897 			<< "   %primitive_point_indices = OpVariable %output_uint_array_32_ptr Output\n"
3898 			<< "           %output_uint_ptr = OpTypePointer Output %uint\n"
3899 			<< "             %vec4_array_32 = OpTypeArray %vec4 %uint_32\n"
3900 			<< "  %output_vec4_array_32_ptr = OpTypePointer Output %vec4_array_32\n"
3901 			<< "          %primitive_colors = OpVariable %output_vec4_array_32_ptr Output\n"
3902 			<< "                      %blue = OpConstantComposite %vec4 %float_0 %float_0 %float_1 %float_1\n"
3903 			<< "            %constand_id_20 = OpSpecConstant %uint 1\n"
3904 			<< "            %constant_id_21 = OpSpecConstant %uint 1\n"
3905 			<< "            %constant_id_22 = OpSpecConstant %uint 1\n"
3906 			<< "                      %main = OpFunction %type_void None %void_func\n"
3907 			<< "                %main_label = OpLabel\n"
3908 			<< "                  %pixel_id = OpVariable %func_uint_ptr Function\n"
3909 			<< "%local_invocation_index_val = OpLoad %uint %local_invocation_index\n"
3910 			;
3911 
3912 		if (useTask)
3913 		{
3914 			meshSPV
3915 				<< "           %td_pixel_id_ptr = OpAccessChain %task_payload_uint_ptr %task_data %int_0 %local_invocation_index_val\n"
3916 				<< "           %td_pixel_id_val = OpLoad %uint %td_pixel_id_ptr\n"
3917 				<< "                              OpStore %pixel_id %td_pixel_id_val\n"
3918 				;
3919 		}
3920 		else
3921 		{
3922 			meshSPV << "                              OpStore %pixel_id %local_invocation_index_val\n";
3923 		}
3924 
3925 		meshSPV
3926 			<< "                              OpSetMeshOutputsEXT %uint_32 %uint_32\n"
3927 			<< "              %pixel_id_val = OpLoad %uint %pixel_id\n"
3928 			<< "                %point_size = OpAccessChain %output_float_ptr %mesh_vertices %pixel_id_val %int_1\n"
3929 			<< "                              OpStore %point_size %float_1\n"
3930 			<< "        %pixel_id_val_float = OpConvertUToF %float %pixel_id_val\n"
3931 			<< "       %pixel_id_val_center = OpFAdd %float %pixel_id_val_float %float_0_5\n"
3932 			<< "                   %x_unorm = OpFDiv %float %pixel_id_val_center %float_32\n"
3933 			<< "                 %x_unorm_2 = OpFMul %float %x_unorm %float_2\n"
3934 			<< "                    %x_norm = OpFSub %float %x_unorm_2 %float_1\n"
3935 			<< "                 %point_pos = OpCompositeConstruct %vec4 %x_norm %float_0 %float_0 %float_1\n"
3936 			<< "           %gl_position_ptr = OpAccessChain %output_vec4_ptr %mesh_vertices %pixel_id_val %int_0\n"
3937 			<< "                              OpStore %gl_position_ptr %point_pos\n"
3938 			<< "           %point_index_ptr = OpAccessChain %output_uint_ptr %primitive_point_indices %pixel_id_val\n"
3939 			<< "                              OpStore %point_index_ptr %pixel_id_val\n"
3940 			<< "           %point_color_ptr = OpAccessChain %output_vec4_ptr %primitive_colors %pixel_id_val\n"
3941 			<< "                              OpStore %point_color_ptr %blue\n"
3942 			<< "                              OpReturn\n"
3943 			<< "                              OpFunctionEnd\n"
3944 			;
3945 
3946 		programCollection.spirvAsmSources.add("mesh") << meshSPV.str() << spvOptions;
3947 	}
3948 }
3949 
makePipelineShaderStageCreateInfo(VkShaderStageFlagBits stage,VkShaderModule module,const VkSpecializationInfo * pSpecializationInfo=nullptr)3950 VkPipelineShaderStageCreateInfo makePipelineShaderStageCreateInfo (VkShaderStageFlagBits stage, VkShaderModule module, const VkSpecializationInfo* pSpecializationInfo = nullptr)
3951 {
3952 	const VkPipelineShaderStageCreateInfo stageInfo =
3953 	{
3954 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//	VkStructureType						sType;
3955 		nullptr,												//	const void*							pNext;
3956 		0u,														//	VkPipelineShaderStageCreateFlags	flags;
3957 		stage,													//	VkShaderStageFlagBits				stage;
3958 		module,													//	VkShaderModule						module;
3959 		"main",													//	const char*							pName;
3960 		pSpecializationInfo,									//	const VkSpecializationInfo*			pSpecializationInfo;
3961 	};
3962 	return stageInfo;
3963 }
3964 
iterate()3965 tcu::TestStatus LocalSizeIdInstance::iterate ()
3966 {
3967 	const auto&		vkd			= m_context.getDeviceInterface();
3968 	const auto		device		= m_context.getDevice();
3969 	auto&			alloc		= m_context.getDefaultAllocator();
3970 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
3971 	const auto		queue		= m_context.getUniversalQueue();
3972 
3973 	const auto		imageFormat	= getOutputFormat();
3974 	const auto		tcuFormat	= mapVkFormat(imageFormat);
3975 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
3976 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3977 
3978 	const auto&		binaries	= m_context.getBinaryCollection();
3979 	const auto		hasTask		= binaries.contains("task");
3980 
3981 	const VkImageCreateInfo colorBufferInfo =
3982 	{
3983 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
3984 		nullptr,								//	const void*				pNext;
3985 		0u,										//	VkImageCreateFlags		flags;
3986 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
3987 		imageFormat,							//	VkFormat				format;
3988 		imageExtent,							//	VkExtent3D				extent;
3989 		1u,										//	uint32_t				mipLevels;
3990 		1u,										//	uint32_t				arrayLayers;
3991 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
3992 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
3993 		imageUsage,								//	VkImageUsageFlags		usage;
3994 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
3995 		0u,										//	uint32_t				queueFamilyIndexCount;
3996 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
3997 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
3998 	};
3999 
4000 	// Create color image and view.
4001 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
4002 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
4003 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4004 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
4005 
4006 	// Create a memory buffer for verification.
4007 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
4008 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4009 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
4010 
4011 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
4012 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
4013 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
4014 
4015 	// Pipeline layout.
4016 	const auto pipelineLayout = makePipelineLayout(vkd, device, 0u, nullptr, 0u, nullptr);
4017 
4018 	// Shader modules.
4019 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
4020 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
4021 
4022 	Move<VkShaderModule> taskShader;
4023 	if (hasTask)
4024 		taskShader = createShaderModule(vkd, device, binaries.get("task"));
4025 
4026 	// Spec constant data (must match shaders).
4027 	const std::vector<uint32_t> scData {
4028 		//	10		11		12		20		21		22
4029 			32u,	1u,		1u,		32u,	1u,		1u
4030 	};
4031 	const auto scSize = static_cast<uint32_t>(sizeof(uint32_t));
4032 	const std::vector<VkSpecializationMapEntry> scMapEntries {
4033 		makeSpecializationMapEntry(10u, 0u * scSize, scSize),
4034 		makeSpecializationMapEntry(11u, 1u * scSize, scSize),
4035 		makeSpecializationMapEntry(12u, 2u * scSize, scSize),
4036 		makeSpecializationMapEntry(20u, 3u * scSize, scSize),
4037 		makeSpecializationMapEntry(21u, 4u * scSize, scSize),
4038 		makeSpecializationMapEntry(22u, 5u * scSize, scSize),
4039 	};
4040 
4041 	const auto scMapInfo = makeSpecializationInfo(
4042 		static_cast<uint32_t>(scMapEntries.size()), de::dataOrNull(scMapEntries),
4043 		static_cast<uint32_t>(de::dataSize(scData)), de::dataOrNull(scData));
4044 
4045 	std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
4046 	shaderStages.push_back(makePipelineShaderStageCreateInfo(VK_SHADER_STAGE_MESH_BIT_EXT, meshShader.get(), &scMapInfo));
4047 	shaderStages.push_back(makePipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragShader.get()));
4048 	if (hasTask)
4049 		shaderStages.push_back(makePipelineShaderStageCreateInfo(VK_SHADER_STAGE_TASK_BIT_EXT, taskShader.get(), &scMapInfo));
4050 
4051 	// Render pass.
4052 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
4053 
4054 	// Framebuffer.
4055 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
4056 
4057 	// Viewport and scissor.
4058 	const std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
4059 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(imageExtent));
4060 
4061 	// Pipeline with specialization constants.
4062 	const auto pipeline = makeGraphicsPipeline(vkd, device, DE_NULL, pipelineLayout.get(), 0u, shaderStages, renderPass.get(), viewports, scissors);
4063 
4064 	// Command pool and buffer.
4065 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
4066 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4067 	const auto cmdBuffer	= cmdBufferPtr.get();
4068 
4069 	beginCommandBuffer(vkd, cmdBuffer);
4070 
4071 	// Run pipeline.
4072 	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 0.0f);
4073 	const auto		drawCount	= m_params->drawCount();
4074 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
4075 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
4076 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
4077 	endRenderPass(vkd, cmdBuffer);
4078 
4079 	// Copy color buffer to verification buffer.
4080 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
4081 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
4082 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
4083 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
4084 
4085 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
4086 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
4087 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
4088 
4089 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
4090 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
4091 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
4092 
4093 	endCommandBuffer(vkd, cmdBuffer);
4094 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4095 
4096 	// Generate reference image and compare results.
4097 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
4098 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
4099 
4100 	generateReferenceLevel();
4101 	invalidateAlloc(vkd, device, verificationBufferAlloc);
4102 	if (!verifyResult(verificationAccess))
4103 		TCU_FAIL("Result does not match reference; check log for details");
4104 
4105 	return tcu::TestStatus::pass("Pass");
4106 }
4107 
4108 // Test multiple task payloads.
4109 class MultipleTaskPayloadsCase : public MeshShaderMiscCase
4110 {
4111 public:
MultipleTaskPayloadsCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)4112 					MultipleTaskPayloadsCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
4113 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
4114 					{
4115 					}
4116 
4117 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
4118 	TestInstance*	createInstance			(Context& context) const override;
4119 
4120 	static constexpr uint32_t kGoodKeyIdx	= 1u;
4121 };
4122 
4123 class MultipleTaskPayloadsInstance : public MeshShaderMiscInstance
4124 {
4125 public:
MultipleTaskPayloadsInstance(Context & context,const MiscTestParams * params)4126 	MultipleTaskPayloadsInstance (Context& context, const MiscTestParams* params)
4127 		: MeshShaderMiscInstance (context, params)
4128 	{}
4129 
4130 	void			generateReferenceLevel	() override;
4131 	tcu::TestStatus	iterate					() override;
4132 };
4133 
createInstance(Context & context) const4134 TestInstance* MultipleTaskPayloadsCase::createInstance (Context& context) const
4135 {
4136 	return new MultipleTaskPayloadsInstance (context, m_params.get());
4137 }
4138 
initPrograms(vk::SourceCollections & programCollection) const4139 void MultipleTaskPayloadsCase::initPrograms (vk::SourceCollections& programCollection) const
4140 {
4141 	DE_ASSERT(m_params->needsTaskShader());
4142 
4143 	const auto					buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
4144 	const auto					spvBuildOptions	= getMinMeshEXTSpvBuildOptions(programCollection.usedVulkanVersion);
4145 	const std::vector<uint32_t>	keys			{ 3717945376u, 2325956828u, 433982700u };
4146 	//const std::vector<uint32_t> keys { 85u, 170u, 255u };
4147 
4148 	// Generic fragment shader.
4149 	MeshShaderMiscCase::initPrograms(programCollection);
4150 
4151 	const std::string taskDataDecl =
4152 		"struct TaskData {\n"
4153 		"    uint key;\n"
4154 		"};\n"
4155 		"taskPayloadSharedEXT TaskData td;\n"
4156 		;
4157 
4158 	// Idea behind this test: verify that the right payload was passed to the mesh shader and set the geometry color based on that.
4159 	std::ostringstream mesh;
4160 	mesh
4161 		<< "#version 450\n"
4162 		<< "#extension GL_EXT_mesh_shader : enable\n"
4163 		<< "\n"
4164 		<< "layout(local_size_x=1) in;\n"
4165 		<< "layout(triangles) out;\n"
4166 		<< "layout(max_vertices=3, max_primitives=1) out;\n"
4167 		<< "\n"
4168 		<< "layout(location=0) out perprimitiveEXT vec4 triangleColor[];\n"
4169 		<< taskDataDecl
4170 		<< "\n"
4171 		<< "void main ()\n"
4172 		<< "{\n"
4173 		<< "    SetMeshOutputsEXT(3, 1);\n"
4174 		<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0f, 1.0f);\n"
4175 		<< "    gl_MeshVerticesEXT[1].gl_Position = vec4( 3.0, -1.0, 0.0f, 1.0f);\n"
4176 		<< "    gl_MeshVerticesEXT[2].gl_Position = vec4(-1.0,  3.0, 0.0f, 1.0f);\n"
4177 		<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
4178 		<< "    const vec4 color = ((td.key == " << keys[kGoodKeyIdx] << "u) ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0));\n"
4179 		//<< "    const vec4 color = vec4(0.0, 0.0, (float(td.key) / 255.0), 1.0);\n"
4180 		<< "    triangleColor[0] = color;\n"
4181 		<< "}\n"
4182 		;
4183 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
4184 
4185 	const auto& meshCount = m_params->meshCount;
4186 	DE_ASSERT(meshCount.x() == 1u && meshCount.y() == 1u && meshCount.z() == 1u);
4187 	DE_UNREF(meshCount); // For release builds.
4188 
4189 #if 0
4190 #if 0
4191 	// Note: pseudocode, this actually does not compile with glslang.
4192 	std::ostringstream task;
4193 	task
4194 		<< "#version 450\n"
4195 		<< "#extension GL_EXT_mesh_shader : enable\n"
4196 		<< "\n"
4197 		<< "layout(local_size_x=1) in;\n"
4198 		<< "layout(push_constant, std430) uniform PCBlock {\n"
4199 		<< "    uint index;\n"
4200 		<< "} pc;\n"
4201 		<< "struct TaskData {\n"
4202 		<< "    uint key;\n"
4203 		<< "};\n"
4204 		<< "taskPayloadSharedEXT TaskData td0;\n"
4205 		<< "taskPayloadSharedEXT TaskData td1;\n"
4206 		<< "taskPayloadSharedEXT TaskData td2;\n"
4207 		<< "\n"
4208 		<< "void main ()\n"
4209 		<< "{\n"
4210 		<< "    td0.key = " << keys.at(0) << "u;\n"
4211 		<< "    td1.key = " << keys.at(1) << "u;\n"
4212 		<< "    td2.key = " << keys.at(2) << "u;\n"
4213 		<< "    if (pc.index == 0u)      EmitMeshTasksEXT(1u, 1u, 1u, td0);\n"
4214 		<< "    else if (pc.index == 1u) EmitMeshTasksEXT(1u, 1u, 1u, td1);\n"
4215 		<< "    else                     EmitMeshTasksEXT(1u, 1u, 1u, td2);\n"
4216 		<< "}\n"
4217 		;
4218 	programCollection.glslSources.add("task") << glu::TaskSource(task.str());
4219 #else
4220 	// Similar shader to check the setup works.
4221 	std::ostringstream task;
4222 	task
4223 		<< "#version 450\n"
4224 		<< "#extension GL_EXT_mesh_shader : enable\n"
4225 		<< "\n"
4226 		<< "layout(local_size_x=1) in;\n"
4227 		<< "layout(push_constant, std430) uniform PCBlock {\n"
4228 		<< "    uint index;\n"
4229 		<< "} pc;\n"
4230 		<< "struct TaskData {\n"
4231 		<< "    uint key;\n"
4232 		<< "};\n"
4233 		<< "taskPayloadSharedEXT TaskData td;\n"
4234 		<< "\n"
4235 		<< "void main ()\n"
4236 		<< "{\n"
4237 		<< "    if (pc.index == 0u)      td.key = " << keys.at(0) << "u;\n"
4238 		<< "    else if (pc.index == 1u) td.key = " << keys.at(1) << "u;\n"
4239 		<< "    else                     td.key = " << keys.at(2) << "u;\n"
4240 		<< "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
4241 		<< "}\n"
4242 		;
4243 	programCollection.glslSources.add("task") << glu::TaskSource(task.str());
4244 #endif
4245 #else
4246 	std::ostringstream taskSPV;
4247 	taskSPV
4248 		<< "                    OpCapability MeshShadingEXT\n"
4249 		<< "                    OpExtension \"SPV_EXT_mesh_shader\"\n"
4250 		<< "               %1 = OpExtInstImport \"GLSL.std.450\"\n"
4251 		<< "                    OpMemoryModel Logical GLSL450\n"
4252 		<< "                    OpEntryPoint TaskEXT %main \"main\"\n"
4253 		<< "                    OpExecutionMode %main LocalSize 1 1 1\n"
4254 		<< "                    OpMemberDecorate %PCBlock 0 Offset 0\n"
4255 		<< "                    OpDecorate %PCBlock Block\n"
4256 		<< "                    OpDecorate %work_group_size BuiltIn WorkgroupSize\n"
4257 		<< "               %2 = OpTypeVoid\n"
4258 		<< "               %3 = OpTypeFunction %2\n"
4259 		<< "            %uint = OpTypeInt 32 0\n"
4260 		<< "        %TaskData = OpTypeStruct %uint\n"
4261 		<< "    %TaskData_ptr = OpTypePointer TaskPayloadWorkgroupEXT %TaskData\n"
4262 		<< "       %payload_0 = OpVariable %TaskData_ptr TaskPayloadWorkgroupEXT\n"
4263 		<< "       %payload_1 = OpVariable %TaskData_ptr TaskPayloadWorkgroupEXT\n"
4264 		<< "       %payload_2 = OpVariable %TaskData_ptr TaskPayloadWorkgroupEXT\n"
4265 		<< "             %int = OpTypeInt 32 1\n"
4266 		<< "           %int_0 = OpConstant %int 0\n"
4267 		<< "           %key_0 = OpConstant %uint " << keys.at(0) << "\n"
4268 		<< "           %key_1 = OpConstant %uint " << keys.at(1) << "\n"
4269 		<< "           %key_2 = OpConstant %uint " << keys.at(2) << "\n"
4270 		<< "%payload_uint_ptr = OpTypePointer TaskPayloadWorkgroupEXT %uint\n"
4271 		<< "         %PCBlock = OpTypeStruct %uint\n"
4272 		<< "     %PCBlock_ptr = OpTypePointer PushConstant %PCBlock\n"
4273 		<< "              %pc = OpVariable %PCBlock_ptr PushConstant\n"
4274 		<< "     %pc_uint_ptr = OpTypePointer PushConstant %uint\n"
4275 		<< "          %uint_0 = OpConstant %uint 0\n"
4276 		<< "          %uint_1 = OpConstant %uint 1\n"
4277 		<< "            %bool = OpTypeBool\n"
4278 		<< "           %uvec3 = OpTypeVector %uint 3\n"
4279 		<< " %work_group_size = OpConstantComposite %uvec3 %uint_1 %uint_1 %uint_1\n"
4280 		<< "            %main = OpFunction %2 None %3\n"
4281 		<< "               %5 = OpLabel\n"
4282 		<< "   %payload_0_key = OpAccessChain %payload_uint_ptr %payload_0 %int_0\n"
4283 		<< "   %payload_1_key = OpAccessChain %payload_uint_ptr %payload_1 %int_0\n"
4284 		<< "   %payload_2_key = OpAccessChain %payload_uint_ptr %payload_2 %int_0\n"
4285 		<< "                    OpStore %payload_0_key %key_0\n"
4286 		<< "                    OpStore %payload_1_key %key_1\n"
4287 		<< "                    OpStore %payload_2_key %key_2\n"
4288 		<< "    %pc_index_ptr = OpAccessChain %pc_uint_ptr %pc %int_0\n"
4289 		<< "        %pc_index = OpLoad %uint %pc_index_ptr\n"
4290 		<< "              %23 = OpIEqual %bool %pc_index %uint_0\n"
4291 		<< "                    OpSelectionMerge %25 None\n"
4292 		<< "                    OpBranchConditional %23 %24 %27\n"
4293 		<< "              %24 = OpLabel\n"
4294 		<< "                    OpEmitMeshTasksEXT %uint_1 %uint_1 %uint_1 %payload_0\n"
4295 		<< "                    OpBranch %25\n"
4296 		<< "              %27 = OpLabel\n"
4297 		<< "              %30 = OpIEqual %bool %pc_index %uint_1\n"
4298 		<< "                    OpSelectionMerge %32 None\n"
4299 		<< "                    OpBranchConditional %30 %31 %33\n"
4300 		<< "              %31 = OpLabel\n"
4301 		<< "                    OpEmitMeshTasksEXT %uint_1 %uint_1 %uint_1 %payload_1\n"
4302 		<< "                    OpBranch %32\n"
4303 		<< "              %33 = OpLabel\n"
4304 		<< "                    OpEmitMeshTasksEXT %uint_1 %uint_1 %uint_1 %payload_2\n"
4305 		<< "                    OpBranch %32\n"
4306 		<< "              %32 = OpLabel\n"
4307 		<< "                    OpBranch %25\n"
4308 		<< "              %25 = OpLabel\n"
4309 		<< "                    OpReturn\n"
4310 		<< "                    OpFunctionEnd\n"
4311 		;
4312 	programCollection.spirvAsmSources.add("task") << taskSPV.str() << spvBuildOptions;
4313 #endif
4314 }
4315 
generateReferenceLevel()4316 void MultipleTaskPayloadsInstance::generateReferenceLevel ()
4317 {
4318 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
4319 }
4320 
iterate()4321 tcu::TestStatus MultipleTaskPayloadsInstance::iterate ()
4322 {
4323 	const auto&		vkd			= m_context.getDeviceInterface();
4324 	const auto		device		= m_context.getDevice();
4325 	auto&			alloc		= m_context.getDefaultAllocator();
4326 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
4327 	const auto		queue		= m_context.getUniversalQueue();
4328 
4329 	const auto		imageFormat	= getOutputFormat();
4330 	const auto		tcuFormat	= mapVkFormat(imageFormat);
4331 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
4332 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
4333 
4334 	const VkImageCreateInfo colorBufferInfo =
4335 	{
4336 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
4337 		nullptr,								//	const void*				pNext;
4338 		0u,										//	VkImageCreateFlags		flags;
4339 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
4340 		imageFormat,							//	VkFormat				format;
4341 		imageExtent,							//	VkExtent3D				extent;
4342 		1u,										//	uint32_t				mipLevels;
4343 		1u,										//	uint32_t				arrayLayers;
4344 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
4345 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
4346 		imageUsage,								//	VkImageUsageFlags		usage;
4347 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
4348 		0u,										//	uint32_t				queueFamilyIndexCount;
4349 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
4350 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
4351 	};
4352 
4353 	// Create color image and view.
4354 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
4355 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
4356 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4357 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
4358 
4359 	// Create a memory buffer for verification.
4360 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
4361 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4362 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
4363 
4364 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
4365 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
4366 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
4367 
4368 	// Pipeline layout.
4369 	const auto pcSize			= static_cast<uint32_t>(sizeof(uint32_t));
4370 	const auto pcRange			= makePushConstantRange(VK_SHADER_STAGE_TASK_BIT_EXT, 0u, pcSize);
4371 	const auto pipelineLayout	= makePipelineLayout(vkd, device, DE_NULL, &pcRange);
4372 
4373 	// Shader modules.
4374 	const auto&	binaries	= m_context.getBinaryCollection();
4375 	const auto	hasTask		= binaries.contains("task");
4376 
4377 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
4378 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
4379 
4380 	Move<VkShaderModule> taskShader;
4381 	if (hasTask)
4382 		taskShader = createShaderModule(vkd, device, binaries.get("task"));
4383 
4384 	// Render pass.
4385 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
4386 
4387 	// Framebuffer.
4388 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
4389 
4390 	// Viewport and scissor.
4391 	const std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
4392 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(imageExtent));
4393 
4394 	// Color blending.
4395 	const auto									colorWriteMask	= (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
4396 	const VkPipelineColorBlendAttachmentState	blendAttState	=
4397 	{
4398 		VK_TRUE,				//	VkBool32				blendEnable;
4399 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			srcColorBlendFactor;
4400 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			dstColorBlendFactor;
4401 		VK_BLEND_OP_ADD,		//	VkBlendOp				colorBlendOp;
4402 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			srcAlphaBlendFactor;
4403 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			dstAlphaBlendFactor;
4404 		VK_BLEND_OP_ADD,		//	VkBlendOp				alphaBlendOp;
4405 		colorWriteMask,			//	VkColorComponentFlags	colorWriteMask;
4406 	};
4407 
4408 	const VkPipelineColorBlendStateCreateInfo colorBlendInfo =
4409 	{
4410 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//	VkStructureType								sType;
4411 		nullptr,													//	const void*									pNext;
4412 		0u,															//	VkPipelineColorBlendStateCreateFlags		flags;
4413 		VK_FALSE,													//	VkBool32									logicOpEnable;
4414 		VK_LOGIC_OP_OR,												//	VkLogicOp									logicOp;
4415 		1u,															//	uint32_t									attachmentCount;
4416 		&blendAttState,												//	const VkPipelineColorBlendAttachmentState*	pAttachments;
4417 		{ 0.0f, 0.0f, 0.0f, 0.0f },									//	float										blendConstants[4];
4418 	};
4419 
4420 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
4421 		taskShader.get(), meshShader.get(), fragShader.get(),
4422 		renderPass.get(), viewports, scissors, 0u/*subpass*/,
4423 		nullptr, nullptr, nullptr, &colorBlendInfo);
4424 
4425 	// Command pool and buffer.
4426 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
4427 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4428 	const auto cmdBuffer	= cmdBufferPtr.get();
4429 
4430 	beginCommandBuffer(vkd, cmdBuffer);
4431 
4432 	// Run pipeline.
4433 	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 0.0f);
4434 	const auto		drawCount	= m_params->drawCount();
4435 	const uint32_t	pcData		= MultipleTaskPayloadsCase::kGoodKeyIdx;
4436 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
4437 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
4438 	vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), VK_SHADER_STAGE_TASK_BIT_EXT, 0u, pcSize, &pcData);
4439 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
4440 	endRenderPass(vkd, cmdBuffer);
4441 
4442 	// Copy color buffer to verification buffer.
4443 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
4444 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
4445 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
4446 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
4447 
4448 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
4449 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
4450 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
4451 
4452 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
4453 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
4454 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
4455 
4456 	endCommandBuffer(vkd, cmdBuffer);
4457 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4458 
4459 	// Generate reference image and compare results.
4460 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
4461 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
4462 
4463 	generateReferenceLevel();
4464 	invalidateAlloc(vkd, device, verificationBufferAlloc);
4465 	if (!verifyResult(verificationAccess))
4466 		TCU_FAIL("Result does not match reference; check log for details");
4467 
4468 	return tcu::TestStatus::pass("Pass");
4469 }
4470 
4471 // Test multiple task/mesh draw calls and updating push constants and descriptors in between. We will divide the output image in 4
4472 // quadrants, and use each task/mesh draw call to draw on a particular quadrant. The output color in each quadrant will be composed
4473 // of data from different sources: storage buffer, sampled image or push constant value, and those will change before each draw
4474 // call. We'll prepare different descriptors for each quadrant.
4475 class RebindSetsCase : public MeshShaderMiscCase
4476 {
4477 public:
RebindSetsCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)4478 					RebindSetsCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
4479 						: MeshShaderMiscCase(testCtx, name, description, std::move(params))
4480 						{
4481 							const auto drawCount = m_params->drawCount();
4482 							DE_ASSERT(drawCount.x() == 1u && drawCount.y() == 1u && drawCount.z() == 1u);
4483 							DE_UNREF(drawCount); // For release builds.
4484 						}
~RebindSetsCase(void)4485 	virtual			~RebindSetsCase		(void) {}
4486 
4487 	TestInstance*	createInstance		(Context& context) const override;
4488 	void			checkSupport		(Context& context) const override;
4489 	void			initPrograms		(vk::SourceCollections& programCollection) const override;
4490 };
4491 
4492 class RebindSetsInstance : public MeshShaderMiscInstance
4493 {
4494 public:
RebindSetsInstance(Context & context,const MiscTestParams * params)4495 						RebindSetsInstance		(Context& context, const MiscTestParams* params)
4496 							: MeshShaderMiscInstance(context, params) {}
~RebindSetsInstance(void)4497 	virtual				~RebindSetsInstance		(void) {}
4498 
4499 	void				generateReferenceLevel	() override;
4500 	tcu::TestStatus		iterate					(void) override;
4501 
4502 protected:
4503 	struct QuadrantInfo
4504 	{
4505 		// Offsets in framebuffer coordinates (0 to 2, final coordinates in range -1 to 1)
4506 		float		offsetX;
4507 		float		offsetY;
4508 		tcu::Vec4	color;
4509 
QuadrantInfovkt::MeshShader::__anonb99b7fd80111::RebindSetsInstance::QuadrantInfo4510 		QuadrantInfo (float offsetX_, float offsetY_, float red, float green, float blue)
4511 			: offsetX	(offsetX_)
4512 			, offsetY	(offsetY_)
4513 			, color		(red, green, blue, 1.0f)
4514 		{}
4515 	};
4516 
getQuadrantInfos()4517 	static std::vector<QuadrantInfo> getQuadrantInfos ()
4518 	{
4519 		std::vector<QuadrantInfo> infos;
4520 		infos.reserve(4u);
4521 
4522 		//                 offsets     rgb
4523 		infos.emplace_back(0.0f, 0.0f, 1.0f, 0.0f, 1.0f);
4524 		infos.emplace_back(1.0f, 0.0f, 1.0f, 1.0f, 0.0f);
4525 		infos.emplace_back(0.0f, 1.0f, 0.0f, 0.0f, 1.0f);
4526 		infos.emplace_back(1.0f, 1.0f, 0.0f, 1.0f, 1.0f);
4527 
4528 		return infos;
4529 	}
4530 
4531 	struct PushConstants
4532 	{
4533 		float offsetX;
4534 		float offsetY;
4535 		float blueComponent;
4536 	};
4537 };
4538 
createInstance(Context & context) const4539 TestInstance* RebindSetsCase::createInstance (Context &context) const
4540 {
4541 	return new RebindSetsInstance(context, m_params.get());
4542 }
4543 
checkSupport(Context & context) const4544 void RebindSetsCase::checkSupport (Context& context) const
4545 {
4546 	genericCheckSupport(context, true, false);
4547 }
4548 
initPrograms(vk::SourceCollections & programCollection) const4549 void RebindSetsCase::initPrograms (vk::SourceCollections& programCollection) const
4550 {
4551 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
4552 
4553 	// Generic fragment shader.
4554 	MeshShaderMiscCase::initPrograms(programCollection);
4555 
4556 	const std::string ssbo		= "layout (set=0, binding=0, std430) readonly buffer SSBOBlock { float redComponent; } ssbo;\n";
4557 	const std::string combined	= "layout (set=0, binding=1) uniform sampler2D greenComponent;\n";
4558 	const std::string pc		= "layout (push_constant, std430) uniform PCBlock { float offsetX; float offsetY; float blueComponent; } pc;\n";
4559 	const std::string payload	= "struct TaskData { float redComponent; }; taskPayloadSharedEXT TaskData td;\n";
4560 
4561 	std::ostringstream task;
4562 	task
4563 		<< "#version 450\n"
4564 		<< "#extension GL_EXT_mesh_shader : enable\n"
4565 		<< "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
4566 		<< "\n"
4567 		<< ssbo
4568 		<< payload
4569 		<< "\n"
4570 		<< "void main (void)\n"
4571 		<< "{\n"
4572 		<< "    td.redComponent = ssbo.redComponent;\n"
4573 		<< "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
4574 		<< "}\n"
4575 		;
4576 	programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
4577 
4578 	std::ostringstream mesh;
4579 	mesh
4580 		<< "#version 450\n"
4581 		<< "#extension GL_EXT_mesh_shader : enable\n"
4582 		<< "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
4583 		<< "layout (triangles) out;\n"
4584 		<< "layout (max_vertices=4, max_primitives=2) out;\n"
4585 		<< "\n"
4586 		<< combined
4587 		<< pc
4588 		<< payload
4589 		<< "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
4590 		<< "\n"
4591 		<< "void main (void)\n"
4592 		<< "{\n"
4593 		<< "    SetMeshOutputsEXT(4u, 2u);\n"
4594 		<< "\n"
4595 		<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0 + pc.offsetX, -1.0 + pc.offsetY, 0.0, 1.0);\n"
4596 		<< "    gl_MeshVerticesEXT[1].gl_Position = vec4( 0.0 + pc.offsetX, -1.0 + pc.offsetY, 0.0, 1.0);\n"
4597 		<< "    gl_MeshVerticesEXT[2].gl_Position = vec4(-1.0 + pc.offsetX,  0.0 + pc.offsetY, 0.0, 1.0);\n"
4598 		<< "    gl_MeshVerticesEXT[3].gl_Position = vec4( 0.0 + pc.offsetX,  0.0 + pc.offsetY, 0.0, 1.0);\n"
4599 		<< "\n"
4600 		<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(2u, 1u, 0u);\n"
4601 		<< "    gl_PrimitiveTriangleIndicesEXT[1] = uvec3(2u, 3u, 1u);\n"
4602 		<< "\n"
4603 		<< "    const vec4 primColor = vec4(td.redComponent, texture(greenComponent, vec2(0.5, 0.5)).x, pc.blueComponent, 1.0);\n"
4604 		<< "    primitiveColor[0] = primColor;\n"
4605 		<< "    primitiveColor[1] = primColor;\n"
4606 		<< "}\n"
4607 		;
4608 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
4609 }
4610 
generateReferenceLevel()4611 void RebindSetsInstance::generateReferenceLevel ()
4612 {
4613 	const auto iWidth	= static_cast<int>(m_params->width);
4614 	const auto iHeight	= static_cast<int>(m_params->height);
4615 	const auto fWidth	= static_cast<float>(iWidth);
4616 	const auto fHeight	= static_cast<float>(iHeight);
4617 
4618 	DE_ASSERT(iWidth % 2 == 0);
4619 	DE_ASSERT(iHeight % 2 == 0);
4620 
4621 	const auto halfWidth	= iWidth / 2;
4622 	const auto halfHeight	= iHeight / 2;
4623 
4624 	const auto format		= getOutputFormat();
4625 	const auto tcuFormat	= mapVkFormat(format);
4626 
4627 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
4628 	const auto access = m_referenceLevel->getAccess();
4629 
4630 	const auto quadrantInfos = getQuadrantInfos();
4631 	DE_ASSERT(quadrantInfos.size() == 4u);
4632 
4633 	for (const auto& quadrantInfo : quadrantInfos)
4634 	{
4635 		const auto xCorner		= static_cast<int>(quadrantInfo.offsetX / 2.0f * fWidth);
4636 		const auto yCorner		= static_cast<int>(quadrantInfo.offsetY / 2.0f * fHeight);
4637 		const auto subregion	= tcu::getSubregion(access, xCorner, yCorner, halfWidth, halfHeight);
4638 
4639 		tcu::clear(subregion, quadrantInfo.color);
4640 	}
4641 }
4642 
iterate(void)4643 tcu::TestStatus RebindSetsInstance::iterate (void)
4644 {
4645 	const auto&			vkd				= m_context.getDeviceInterface();
4646 	const auto			device			= m_context.getDevice();
4647 	auto&				alloc			= m_context.getDefaultAllocator();
4648 	const auto			queueIndex		= m_context.getUniversalQueueFamilyIndex();
4649 	const auto			queue			= m_context.getUniversalQueue();
4650 	const auto			quadrantInfos	= getQuadrantInfos();
4651 	const auto			setCount		= static_cast<uint32_t>(quadrantInfos.size());
4652 	const auto			textureExtent	= makeExtent3D(1u, 1u, 1u);
4653 	const tcu::IVec3	iTexExtent		(static_cast<int>(textureExtent.width), static_cast<int>(textureExtent.height), static_cast<int>(textureExtent.depth));
4654 	const auto			textureFormat	= VK_FORMAT_R8G8B8A8_UNORM;
4655 	const auto			tcuTexFormat	= mapVkFormat(textureFormat);
4656 	const auto			textureUsage	= (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
4657 	const auto			colorExtent		= makeExtent3D(m_params->width, m_params->height, 1u);
4658 	const auto			colorFormat		= getOutputFormat();
4659 	const auto			tcuColorFormat	= mapVkFormat(colorFormat);
4660 	const auto			colorUsage		= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
4661 
4662 	DE_ASSERT(quadrantInfos.size() == 4u);
4663 
4664 	// We need 4 descriptor sets: 4 buffers, 4 images and 1 sampler.
4665 	const VkSamplerCreateInfo	samplerCreateInfo	= initVulkanStructure();
4666 	const auto					sampler				= createSampler(vkd, device, &samplerCreateInfo);
4667 
4668 	// Buffers.
4669 	const auto					ssboSize			= static_cast<VkDeviceSize>(sizeof(float));
4670 	const auto					ssboCreateInfo		= makeBufferCreateInfo(ssboSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4671 
4672 	std::vector<std::unique_ptr<BufferWithMemory>> ssbos;
4673 	ssbos.reserve(quadrantInfos.size());
4674 	for (const auto& quadrantInfo : quadrantInfos)
4675 	{
4676 		ssbos.emplace_back(new BufferWithMemory(vkd, device, alloc, ssboCreateInfo, MemoryRequirement::HostVisible));
4677 		void* data = ssbos.back()->getAllocation().getHostPtr();
4678 		const auto redComponent = quadrantInfo.color.x();
4679 		deMemcpy(data, &redComponent, sizeof(redComponent));
4680 	}
4681 
4682 	// Textures.
4683 	const VkImageCreateInfo textureCreateInfo =
4684 	{
4685 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
4686 		nullptr,								//	const void*				pNext;
4687 		0u,										//	VkImageCreateFlags		flags;
4688 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
4689 		textureFormat,							//	VkFormat				format;
4690 		textureExtent,							//	VkExtent3D				extent;
4691 		1u,										//	uint32_t				mipLevels;
4692 		1u,										//	uint32_t				arrayLayers;
4693 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
4694 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
4695 		textureUsage,							//	VkImageUsageFlags		usage;
4696 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
4697 		0u,										//	uint32_t				queueFamilyIndexCount;
4698 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
4699 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
4700 	};
4701 	const auto textureSRR			= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
4702 	const auto textureSRL			= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4703 	const auto textureCopyRegion	= makeBufferImageCopy(textureExtent, textureSRL);
4704 
4705 	std::vector<std::unique_ptr<ImageWithMemory>> textures;
4706 	for (size_t i = 0u; i < quadrantInfos.size(); ++i)
4707 		textures.emplace_back(new ImageWithMemory(vkd, device, alloc, textureCreateInfo, MemoryRequirement::Any));
4708 
4709 	std::vector<Move<VkImageView>> textureViews;
4710 	textureViews.reserve(quadrantInfos.size());
4711 	for (const auto& texture : textures)
4712 		textureViews.push_back(makeImageView(vkd, device, texture->get(), VK_IMAGE_VIEW_TYPE_2D, textureFormat, textureSRR));
4713 
4714 	// Auxiliar buffers to fill the images with the right colors.
4715 	const auto pixelSize				= tcu::getPixelSize(tcuTexFormat);
4716 	const auto pixelCount				= textureExtent.width * textureExtent.height * textureExtent.depth;
4717 	const auto auxiliarBufferSize		= static_cast<VkDeviceSize>(static_cast<VkDeviceSize>(pixelSize) * static_cast<VkDeviceSize>(pixelCount));
4718 	const auto auxiliarBufferCreateInfo	= makeBufferCreateInfo(auxiliarBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
4719 
4720 	std::vector<std::unique_ptr<BufferWithMemory>> auxiliarBuffers;
4721 	auxiliarBuffers.reserve(quadrantInfos.size());
4722 	for (const auto& quadrantInfo : quadrantInfos)
4723 	{
4724 		auxiliarBuffers.emplace_back(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, MemoryRequirement::HostVisible));
4725 
4726 		void*					data			= auxiliarBuffers.back()->getAllocation().getHostPtr();
4727 		tcu::PixelBufferAccess	access			(tcuTexFormat, iTexExtent, data);
4728 		const tcu::Vec4			quadrantColor	(quadrantInfo.color.y(), 0.0f, 0.0f, 1.0f);
4729 
4730 		tcu::clear(access, quadrantColor);
4731 	}
4732 
4733 	// Descriptor set layout.
4734 	DescriptorSetLayoutBuilder layoutBuilder;
4735 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_TASK_BIT_EXT);
4736 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_MESH_BIT_EXT);
4737 	const auto setLayout = layoutBuilder.build(vkd, device);
4738 
4739 	// Pipeline layout.
4740 	const auto pcSize			= static_cast<uint32_t>(sizeof(PushConstants));
4741 	const auto pcRange			= makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize);
4742 	const auto pipelineLayout	= makePipelineLayout(vkd, device, setLayout.get(), &pcRange);
4743 
4744 	// Descriptor pool and sets.
4745 	DescriptorPoolBuilder poolBuilder;
4746 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, setCount);
4747 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, setCount);
4748 	const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, setCount);
4749 
4750 	std::vector<Move<VkDescriptorSet>> descriptorSets;
4751 	for (size_t i = 0; i < quadrantInfos.size(); ++i)
4752 		descriptorSets.push_back(makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get()));
4753 
4754 	// Update descriptor sets.
4755 	DescriptorSetUpdateBuilder updateBuilder;
4756 	for (size_t i = 0; i < descriptorSets.size(); ++i)
4757 	{
4758 		const auto&	descriptorSet	= descriptorSets.at(i);
4759 		const auto&	ssbo			= ssbos.at(i);
4760 		const auto&	textureView		= textureViews.at(i);
4761 		const auto	descBufferInfo	= makeDescriptorBufferInfo(ssbo->get(), 0ull, ssboSize);
4762 		const auto	descImageInfo	= makeDescriptorImageInfo(sampler.get(), textureView.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
4763 
4764 		updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descBufferInfo);
4765 		updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &descImageInfo);
4766 	}
4767 	updateBuilder.update(vkd, device);
4768 
4769 	// Color attachment.
4770 	const VkImageCreateInfo colorCreateInfo =
4771 	{
4772 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
4773 		nullptr,								//	const void*				pNext;
4774 		0u,										//	VkImageCreateFlags		flags;
4775 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
4776 		colorFormat,							//	VkFormat				format;
4777 		colorExtent,							//	VkExtent3D				extent;
4778 		1u,										//	uint32_t				mipLevels;
4779 		1u,										//	uint32_t				arrayLayers;
4780 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
4781 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
4782 		colorUsage,								//	VkImageUsageFlags		usage;
4783 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
4784 		0u,										//	uint32_t				queueFamilyIndexCount;
4785 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
4786 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
4787 	};
4788 	const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
4789 	const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4790 
4791 	ImageWithMemory	colorAttachment	(vkd, device, alloc, colorCreateInfo, MemoryRequirement::Any);
4792 	const auto		colorView		= makeImageView(vkd, device, colorAttachment.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
4793 
4794 	// Create a memory buffer for verification.
4795 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(colorExtent.width * colorExtent.height * tcu::getPixelSize(tcuColorFormat));
4796 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4797 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
4798 
4799 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
4800 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
4801 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
4802 
4803 	// Render pass and framebuffer.
4804 	const auto renderPass	= makeRenderPass(vkd, device, colorFormat);
4805 	const auto framebuffer	= makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), colorExtent.width, colorExtent.height);
4806 
4807 	const std::vector<VkViewport>	viewports	(1u, makeViewport(colorExtent));
4808 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(colorExtent));
4809 
4810 	// Shader modules and pipeline.
4811 	const auto&	binaries	= m_context.getBinaryCollection();
4812 	const auto	taskShader	= createShaderModule(vkd, device, binaries.get("task"));
4813 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
4814 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
4815 	const auto	pipeline	= makeGraphicsPipeline(
4816 		vkd, device, pipelineLayout.get(),
4817 		taskShader.get(), meshShader.get(), fragShader.get(),
4818 		renderPass.get(), viewports, scissors);
4819 
4820 	// Command pool and buffer.
4821 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
4822 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4823 	const auto cmdBuffer	= cmdBufferPtr.get();
4824 
4825 	beginCommandBuffer(vkd, cmdBuffer);
4826 
4827 	// Copy data from auxiliar buffers to textures.
4828 	for (const auto& texture : textures)
4829 	{
4830 		const auto prepareTextureForCopy = makeImageMemoryBarrier(
4831 			0u, VK_ACCESS_TRANSFER_WRITE_BIT,
4832 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4833 			texture->get(), textureSRR);
4834 		cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &prepareTextureForCopy);
4835 	}
4836 
4837 	for (size_t i = 0; i < auxiliarBuffers.size(); ++i)
4838 	{
4839 		const auto& auxBuffer	= auxiliarBuffers.at(i);
4840 		const auto& texture		= textures.at(i);
4841 		vkd.cmdCopyBufferToImage(cmdBuffer, auxBuffer->get(), texture->get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &textureCopyRegion);
4842 	}
4843 
4844 	// Prepare textures for sampling.
4845 	for (const auto& texture : textures)
4846 	{
4847 		const auto prepareTextureForSampling = makeImageMemoryBarrier(
4848 			VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
4849 			VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
4850 			texture->get(), textureSRR);
4851 		cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT, &prepareTextureForSampling);
4852 	}
4853 
4854 	// Render stuff.
4855 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f));
4856 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
4857 
4858 	const auto drawCount = m_params->drawCount();
4859 	for (size_t i = 0; i < quadrantInfos.size(); ++i)
4860 	{
4861 		const auto& quadrantInfo = quadrantInfos.at(i);
4862 		const auto& descriptorSet = descriptorSets.at(i);
4863 
4864 		PushConstants pcData;
4865 		pcData.blueComponent = quadrantInfo.color.z();
4866 		pcData.offsetX = quadrantInfo.offsetX;
4867 		pcData.offsetY = quadrantInfo.offsetY;
4868 
4869 		vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
4870 		vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize, &pcData);
4871 		vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
4872 	}
4873 
4874 	endRenderPass(vkd, cmdBuffer);
4875 
4876 	// Copy color attachment to verification buffer.
4877 	const auto preCopyBarrier	= makeImageMemoryBarrier(
4878 		VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
4879 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4880 		colorAttachment.get(), colorSRR);
4881 	const auto postCopyBarrier	= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4882 	const auto copyRegion		= makeBufferImageCopy(colorExtent, colorSRL);
4883 
4884 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
4885 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
4886 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
4887 
4888 	endCommandBuffer(vkd, cmdBuffer);
4889 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4890 
4891 	// Generate reference image and compare results.
4892 	const tcu::IVec3					iColorExtent		(static_cast<int>(colorExtent.width), static_cast<int>(colorExtent.height), 1);
4893 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuColorFormat, iColorExtent, verificationBufferData);
4894 
4895 	generateReferenceLevel();
4896 	invalidateAlloc(vkd, device, verificationBufferAlloc);
4897 	if (!verifyResult(verificationAccess))
4898 		TCU_FAIL("Result does not match reference; check log for details");
4899 
4900 	return tcu::TestStatus::pass("Pass");
4901 }
4902 
4903 } // anonymous namespace
4904 
createMeshShaderMiscTestsEXT(tcu::TestContext & testCtx)4905 tcu::TestCaseGroup* createMeshShaderMiscTestsEXT (tcu::TestContext& testCtx)
4906 {
4907 	GroupPtr miscTests (new tcu::TestCaseGroup(testCtx, "misc", "Mesh Shader Misc Tests"));
4908 
4909 	{
4910 		ParamsPtr paramsPtr (new MiscTestParams(
4911 			/*taskCount*/	tcu::just(tcu::UVec3(2u, 1u, 1u)),
4912 			/*meshCount*/	tcu::UVec3(2u, 1u, 1u),
4913 			/*width*/		8u,
4914 			/*height*/		8u));
4915 
4916 		miscTests->addChild(new ComplexTaskDataCase(testCtx, "complex_task_data", "Pass a complex structure from the task to the mesh shader", std::move(paramsPtr)));
4917 	}
4918 
4919 	{
4920 		ParamsPtr paramsPtr (new MiscTestParams(
4921 			/*taskCount*/	tcu::Nothing,
4922 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
4923 			/*width*/		5u,		// Use an odd value so there's a pixel in the exact center.
4924 			/*height*/		7u));	// Idem.
4925 
4926 		miscTests->addChild(new SinglePointCase(testCtx, "single_point", "Draw a single point", std::move(paramsPtr)));
4927 	}
4928 
4929 	{
4930 		ParamsPtr paramsPtr (new MiscTestParams(
4931 			/*taskCount*/	tcu::Nothing,
4932 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
4933 			/*width*/		8u,
4934 			/*height*/		5u));	// Use an odd value so there's a center line.
4935 
4936 		miscTests->addChild(new SingleLineCase(testCtx, "single_line", "Draw a single line", std::move(paramsPtr)));
4937 	}
4938 
4939 	{
4940 		ParamsPtr paramsPtr (new MiscTestParams(
4941 			/*taskCount*/	tcu::Nothing,
4942 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
4943 			/*width*/		5u,	// Use an odd value so there's a pixel in the exact center.
4944 			/*height*/		7u));	// Idem.
4945 
4946 		miscTests->addChild(new SingleTriangleCase(testCtx, "single_triangle", "Draw a single triangle", std::move(paramsPtr)));
4947 	}
4948 
4949 	{
4950 		ParamsPtr paramsPtr (new MiscTestParams(
4951 			/*taskCount*/	tcu::Nothing,
4952 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
4953 			/*width*/		16u,
4954 			/*height*/		16u));
4955 
4956 		miscTests->addChild(new MaxPointsCase(testCtx, "max_points", "Draw the maximum number of points", std::move(paramsPtr)));
4957 	}
4958 
4959 	{
4960 		ParamsPtr paramsPtr (new MiscTestParams(
4961 			/*taskCount*/	tcu::Nothing,
4962 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
4963 			/*width*/		1u,
4964 			/*height*/		1020u));
4965 
4966 		miscTests->addChild(new MaxLinesCase(testCtx, "max_lines", "Draw the maximum number of lines", std::move(paramsPtr)));
4967 	}
4968 
4969 	{
4970 		const tcu::UVec3 localSizes[] =
4971 		{
4972 			tcu::UVec3(2u, 4u, 8u),
4973 			tcu::UVec3(4u, 2u, 4u),
4974 			tcu::UVec3(2u, 2u, 4u),
4975 		};
4976 
4977 		for (const auto& localSize : localSizes)
4978 		{
4979 			const auto workGroupSize	= (localSize.x() * localSize.y() * localSize.z());
4980 			const auto wgsStr			= std::to_string(workGroupSize);
4981 			const auto testName			= "max_triangles_workgroupsize_" + wgsStr;
4982 			const auto testDesc			= "Draw the maximum number of triangles using a work group size of " + wgsStr;
4983 
4984 			ParamsPtr paramsPtr (new MaxTrianglesCase::Params(
4985 				/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
4986 				/*width*/		512u,
4987 				/*height*/		512u,
4988 				/*localSize*/	localSize));
4989 
4990 			miscTests->addChild(new MaxTrianglesCase(testCtx, testName, testDesc, std::move(paramsPtr)));
4991 		}
4992 	}
4993 
4994 	using LargeWorkGroupParamsPtr = std::unique_ptr<LargeWorkGroupParams>;
4995 	const int dimensionCases[] = { 0, 1, 2 };
4996 
4997 	for (const auto& dim : dimensionCases)
4998 	{
4999 		const auto dimChar = dimSuffix(dim);
5000 
5001 		{
5002 			tcu::UVec3 taskCount (8u, 8u, 8u);
5003 			taskCount[dim] = 65535u;
5004 
5005 			LargeWorkGroupParamsPtr lwgParamsPtr	(new LargeWorkGroupParams(
5006 				/*taskCount*/						tcu::just(taskCount),
5007 				/*meshCount*/						tcu::UVec3(1u, 1u, 1u),
5008 				/*width*/							2040u,
5009 				/*height*/							2056u,
5010 				/*localInvocations*/				tcu::UVec3(1u, 1u, 1u)));
5011 
5012 			ParamsPtr paramsPtr (lwgParamsPtr.release());
5013 
5014 			const auto name = std::string("many_task_work_groups_") + dimChar;
5015 			const auto desc = std::string("Generate a large number of task work groups in the ") + dimChar + " dimension";
5016 
5017 			miscTests->addChild(new LargeWorkGroupCase(testCtx, name, desc, std::move(paramsPtr)));
5018 		}
5019 
5020 		{
5021 			tcu::UVec3 meshCount (8u, 8u, 8u);
5022 			meshCount[dim] = 65535u;
5023 
5024 			LargeWorkGroupParamsPtr lwgParamsPtr	(new LargeWorkGroupParams(
5025 				/*taskCount*/						tcu::Nothing,
5026 				/*meshCount*/						meshCount,
5027 				/*width*/							2040u,
5028 				/*height*/							2056u,
5029 				/*localInvocations*/				tcu::UVec3(1u, 1u, 1u)));
5030 
5031 			ParamsPtr paramsPtr (lwgParamsPtr.release());
5032 
5033 			const auto name = std::string("many_mesh_work_groups_") + dimChar;
5034 			const auto desc = std::string("Generate a large number of mesh work groups in the ") + dimChar + " dimension";
5035 
5036 			miscTests->addChild(new LargeWorkGroupCase(testCtx, name, desc, std::move(paramsPtr)));
5037 		}
5038 
5039 		{
5040 			tcu::UVec3 meshCount (1u, 1u, 1u);
5041 			tcu::UVec3 taskCount (1u, 1u, 1u);
5042 			tcu::UVec3 localInvs (1u, 1u, 1u);
5043 
5044 			meshCount[dim] = 256u;
5045 			taskCount[dim] = 128u;
5046 			localInvs[dim] = 128u;
5047 
5048 			LargeWorkGroupParamsPtr lwgParamsPtr	(new LargeWorkGroupParams(
5049 				/*taskCount*/						tcu::just(taskCount),
5050 				/*meshCount*/						meshCount,
5051 				/*width*/							2048u,
5052 				/*height*/							2048u,
5053 				/*localInvocations*/				localInvs));
5054 
5055 			ParamsPtr paramsPtr (lwgParamsPtr.release());
5056 
5057 			const auto name = std::string("many_task_mesh_work_groups_") + dimChar;
5058 			const auto desc = std::string("Generate a large number of task and mesh work groups in the ") + dimChar + " dimension";
5059 
5060 			miscTests->addChild(new LargeWorkGroupCase(testCtx, name, desc, std::move(paramsPtr)));
5061 		}
5062 	}
5063 
5064 	{
5065 		const PrimitiveType types[] = {
5066 			PrimitiveType::POINTS,
5067 			PrimitiveType::LINES,
5068 			PrimitiveType::TRIANGLES,
5069 		};
5070 
5071 		for (int i = 0; i < 2; ++i)
5072 		{
5073 			const bool extraWrites = (i > 0);
5074 
5075 			// XXX Is this test legal? [https://gitlab.khronos.org/GLSL/GLSL/-/merge_requests/77#note_348252]
5076 			if (extraWrites)
5077 				continue;
5078 
5079 			for (const auto primType : types)
5080 			{
5081 				std::unique_ptr<NoPrimitivesParams> params	(new NoPrimitivesParams(
5082 					/*taskCount*/							(extraWrites ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5083 					/*meshCount*/							tcu::UVec3(1u, 1u, 1u),
5084 					/*width*/								16u,
5085 					/*height*/								16u,
5086 					/*primitiveType*/						primType));
5087 
5088 				ParamsPtr			paramsPtr	(params.release());
5089 				const auto			primName	= primitiveTypeName(primType);
5090 				const std::string	name		= "no_" + primName + (extraWrites ? "_extra_writes" : "");
5091 				const std::string	desc		= "Run a pipeline that generates no " + primName + (extraWrites ? " but generates primitive data" : "");
5092 
5093 				miscTests->addChild(extraWrites
5094 					? (new NoPrimitivesExtraWritesCase(testCtx, name, desc, std::move(paramsPtr)))
5095 					: (new NoPrimitivesCase(testCtx, name, desc, std::move(paramsPtr))));
5096 			}
5097 		}
5098 	}
5099 
5100 	{
5101 		for (int i = 0; i < 2; ++i)
5102 		{
5103 			const bool useTaskShader = (i == 0);
5104 
5105 			ParamsPtr paramsPtr (new MiscTestParams(
5106 				/*taskCount*/		(useTaskShader ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5107 				/*meshCount*/		tcu::UVec3(1u, 1u, 1u),
5108 				/*width*/			1u,
5109 				/*height*/			1u));
5110 
5111 			const std::string shader	= (useTaskShader ? "task" : "mesh");
5112 			const std::string name		= "barrier_in_" + shader;
5113 			const std::string desc		= "Use a control barrier in the " + shader + " shader";
5114 
5115 			miscTests->addChild(new SimpleBarrierCase(testCtx, name, desc, std::move(paramsPtr)));
5116 		}
5117 	}
5118 
5119 	{
5120 		const struct
5121 		{
5122 			MemoryBarrierType	memBarrierType;
5123 			std::string			caseName;
5124 		} barrierTypes[] =
5125 		{
5126 			{ MemoryBarrierType::SHARED,	"memory_barrier_shared"	},
5127 			{ MemoryBarrierType::GROUP,		"group_memory_barrier"	},
5128 		};
5129 
5130 		for (const auto& barrierCase : barrierTypes)
5131 		{
5132 			for (int i = 0; i < 2; ++i)
5133 			{
5134 				const bool useTaskShader = (i == 0);
5135 
5136 				std::unique_ptr<MemoryBarrierParams> paramsPtr	(new MemoryBarrierParams(
5137 					/*taskCount*/								(useTaskShader ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5138 					/*meshCount*/								tcu::UVec3(1u, 1u, 1u),
5139 					/*width*/									1u,
5140 					/*height*/									1u,
5141 					/*memBarrierType*/							barrierCase.memBarrierType));
5142 
5143 				const std::string shader	= (useTaskShader ? "task" : "mesh");
5144 				const std::string name		= barrierCase.caseName + "_in_" + shader;
5145 				const std::string desc		= "Use " + paramsPtr->glslFunc() + "() in the " + shader + " shader";
5146 
5147 				miscTests->addChild(new MemoryBarrierCase(testCtx, name, desc, std::move(paramsPtr)));
5148 			}
5149 		}
5150 	}
5151 
5152 	{
5153 		for (int i = 0; i < 2; ++i)
5154 		{
5155 			const bool useTaskShader	= (i > 0);
5156 			const auto name				= std::string("custom_attributes") + (useTaskShader ? "_and_task_shader" : "");
5157 			const auto desc				= std::string("Use several custom vertex and primitive attributes") + (useTaskShader ? " and also a task shader" : "");
5158 
5159 			ParamsPtr paramsPtr (new MiscTestParams(
5160 				/*taskCount*/		(useTaskShader ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5161 				/*meshCount*/		tcu::UVec3(1u, 1u, 1u),
5162 				/*width*/			32u,
5163 				/*height*/			32u));
5164 
5165 			miscTests->addChild(new CustomAttributesCase(testCtx, name, desc, std::move(paramsPtr)));
5166 		}
5167 	}
5168 
5169 	{
5170 		for (int i = 0; i < 2; ++i)
5171 		{
5172 			const bool useTaskShader	= (i > 0);
5173 			const auto name				= std::string("push_constant") + (useTaskShader ? "_and_task_shader" : "");
5174 			const auto desc				= std::string("Use push constants in the mesh shader stage") + (useTaskShader ? " and also in the task shader stage" : "");
5175 
5176 			ParamsPtr paramsPtr (new MiscTestParams(
5177 				/*taskCount*/		(useTaskShader ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5178 				/*meshCount*/		tcu::UVec3(1u, 1u, 1u),
5179 				/*width*/			16u,
5180 				/*height*/			16u));
5181 
5182 			miscTests->addChild(new PushConstantCase(testCtx, name, desc, std::move(paramsPtr)));
5183 		}
5184 	}
5185 
5186 	{
5187 		ParamsPtr paramsPtr (new MaximizeThreadsParams(
5188 			/*taskCount*/		tcu::Nothing,
5189 			/*meshCount*/		tcu::UVec3(1u, 1u, 1u),
5190 			/*width*/			128u,
5191 			/*height*/			1u,
5192 			/*localSize*/		32u,
5193 			/*numVertices*/		128u,
5194 			/*numPrimitives*/	256u));
5195 
5196 		miscTests->addChild(new MaximizePrimitivesCase(testCtx, "maximize_primitives", "Use a large number of primitives compared to other sizes", std::move(paramsPtr)));
5197 	}
5198 
5199 	{
5200 		ParamsPtr paramsPtr (new MaximizeThreadsParams(
5201 			/*taskCount*/		tcu::Nothing,
5202 			/*meshCount*/		tcu::UVec3(1u, 1u, 1u),
5203 			/*width*/			64u,
5204 			/*height*/			1u,
5205 			/*localSize*/		32u,
5206 			/*numVertices*/		256u,
5207 			/*numPrimitives*/	128u));
5208 
5209 		miscTests->addChild(new MaximizeVerticesCase(testCtx, "maximize_vertices", "Use a large number of vertices compared to other sizes", std::move(paramsPtr)));
5210 	}
5211 
5212 	{
5213 		const uint32_t kInvocationCases[] = { 32u, 64u, 128u, 256u };
5214 
5215 		for (const auto& invocationCase : kInvocationCases)
5216 		{
5217 			const auto invsStr		= std::to_string(invocationCase);
5218 			const auto numPixels	= invocationCase / 2u;
5219 
5220 			ParamsPtr paramsPtr (new MaximizeThreadsParams(
5221 				/*taskCount*/		tcu::Nothing,
5222 				/*meshCount*/		tcu::UVec3(1u, 1u, 1u),
5223 				/*width*/			numPixels,
5224 				/*height*/			1u,
5225 				/*localSize*/		invocationCase,
5226 				/*numVertices*/		numPixels,
5227 				/*numPrimitives*/	numPixels));
5228 
5229 			miscTests->addChild(new MaximizeInvocationsCase(testCtx, "maximize_invocations_" + invsStr, "Use a large number of invocations compared to other sizes: " + invsStr, std::move(paramsPtr)));
5230 		}
5231 	}
5232 
5233 	{
5234 		for (int i = 0; i < 2; ++i)
5235 		{
5236 			const bool useDynamicTopology = (i > 0);
5237 
5238 			ParamsPtr paramsPtr (new MixedPipelinesParams(
5239 				/*taskCount*/		tcu::Nothing,
5240 				/*meshCount*/		tcu::UVec3(1u, 1u, 1u),
5241 				/*width*/			8u,
5242 				/*height*/			8u,
5243 				/*dynamicTopology*/	useDynamicTopology));
5244 
5245 			const std::string nameSuffix = (useDynamicTopology ? "_dynamic_topology" : "");
5246 			const std::string descSuffix = (useDynamicTopology ? " and use dynamic topology" : "");
5247 
5248 			miscTests->addChild(new MixedPipelinesCase(testCtx, "mixed_pipelines" + nameSuffix, "Test mixing classic and mesh pipelines in the same render pass" + descSuffix, std::move(paramsPtr)));
5249 		}
5250 	}
5251 
5252 	for (int i = 0; i < 2; ++i)
5253 	{
5254 		const bool						useTask		= (i > 0);
5255 		const tcu::Maybe<tcu::UVec3>	taskCount	= (useTask ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::nothing<tcu::UVec3>());
5256 		const std::string				testName	= std::string("first_invocation_") + (useTask ? "task" : "mesh");
5257 
5258 		ParamsPtr paramsPtr (new MiscTestParams(
5259 			/*taskCount*/	taskCount,
5260 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
5261 			/*width*/		128u,
5262 			/*height*/		1u));
5263 
5264 		miscTests->addChild(new FirstInvocationCase(testCtx, testName, "Check only the first invocation is used in EmitMeshTasksEXT() and SetMeshOutputsEXT()", std::move(paramsPtr)));
5265 	}
5266 
5267 	for (int i = 0; i < 2; ++i)
5268 	{
5269 		const bool						useTask		= (i > 0);
5270 		const tcu::Maybe<tcu::UVec3>	taskCount	= (useTask ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::nothing<tcu::UVec3>());
5271 		const std::string				testName	= std::string("local_size_id_") + (useTask ? "task" : "mesh");
5272 
5273 		ParamsPtr paramsPtr (new MiscTestParams(
5274 			/*taskCount*/	taskCount,
5275 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
5276 			/*width*/		32u,
5277 			/*height*/		1u));
5278 
5279 		miscTests->addChild(new LocalSizeIdCase(testCtx, testName, "Check LocalSizeId can be used with task and mesh shaders", std::move(paramsPtr)));
5280 	}
5281 
5282 	if (false) // Disabled. This may be illegal.
5283 	{
5284 		ParamsPtr paramsPtr (new MiscTestParams(
5285 			/*taskCount*/	tcu::UVec3(1u, 1u, 1u),
5286 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
5287 			/*width*/		8u,
5288 			/*height*/		8u));
5289 
5290 		miscTests->addChild(new MultipleTaskPayloadsCase(testCtx, "multiple_task_payloads", "Check the task payload can be chosen among several ones", std::move(paramsPtr)));
5291 	}
5292 
5293 	{
5294 		ParamsPtr paramsPtr (new MiscTestParams(
5295 			/*taskCount*/	tcu::UVec3(1u, 1u, 1u),
5296 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
5297 			/*width*/		8u,
5298 			/*height*/		8u));
5299 
5300 		miscTests->addChild(new PayloadReadCase(testCtx, "payload_read", "Check the task payload can be read from all task shader instances", std::move(paramsPtr)));
5301 	}
5302 
5303 	{
5304 		ParamsPtr paramsPtr (new MiscTestParams(
5305 			/*taskCount*/	tcu::UVec3(1u, 1u, 1u),
5306 			/*meshCount*/	tcu::UVec3(1u, 1u, 1u),
5307 			/*width*/		8u,
5308 			/*height*/		8u));
5309 
5310 		miscTests->addChild(new RebindSetsCase(testCtx, "rebind_sets", "Use several draw calls binding new descriptor sets and updating push constants between them", std::move(paramsPtr)));
5311 	}
5312 
5313 	return miscTests.release();
5314 }
5315 
5316 } // MeshShader
5317 } // vkt
5318