• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader Misc Tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderMiscTests.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28 #include "vktTestCaseUtil.hpp"
29 
30 #include "vkBuilderUtil.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkBufferWithMemory.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkImageUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38 
39 #include "tcuImageCompare.hpp"
40 #include "tcuTexture.hpp"
41 #include "tcuTextureUtil.hpp"
42 #include "tcuMaybe.hpp"
43 #include "tcuStringTemplate.hpp"
44 #include "tcuTestLog.hpp"
45 
46 #include "deRandom.hpp"
47 
48 #include <cstdint>
49 #include <memory>
50 #include <utility>
51 #include <vector>
52 #include <string>
53 #include <sstream>
54 #include <map>
55 #include <limits>
56 
57 namespace vkt
58 {
59 namespace MeshShader
60 {
61 
62 namespace
63 {
64 
65 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
66 
67 using namespace vk;
68 
69 // Output images will use this format.
getOutputFormat()70 VkFormat getOutputFormat ()
71 {
72 	return VK_FORMAT_R8G8B8A8_UNORM;
73 }
74 
75 // Threshold that's reasonable for the previous format.
getCompareThreshold()76 float getCompareThreshold ()
77 {
78 	return 0.005f; // 1/256 < 0.005 < 2/256
79 }
80 
81 // Check mesh shader support.
genericCheckSupport(Context & context,bool requireTaskShader,bool requireVertexStores)82 void genericCheckSupport (Context& context, bool requireTaskShader, bool requireVertexStores)
83 {
84 	checkTaskMeshShaderSupportNV(context, requireTaskShader, true);
85 
86 	if (requireVertexStores)
87 	{
88 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
89 	}
90 }
91 
92 struct MiscTestParams
93 {
94 	tcu::Maybe<uint32_t>	taskCount;
95 	uint32_t				meshCount;
96 
97 	uint32_t				width;
98 	uint32_t				height;
99 
MiscTestParamsvkt::MeshShader::__anond9d5c6950111::MiscTestParams100 	MiscTestParams (const tcu::Maybe<uint32_t>& taskCount_, uint32_t meshCount_, uint32_t width_, uint32_t height_)
101 		: taskCount		(taskCount_)
102 		, meshCount		(meshCount_)
103 		, width			(width_)
104 		, height		(height_)
105 	{}
106 
107 	// Makes the class polymorphic and allows the right destructor to be used for subclasses.
~MiscTestParamsvkt::MeshShader::__anond9d5c6950111::MiscTestParams108 	virtual ~MiscTestParams () {}
109 
needsTaskShadervkt::MeshShader::__anond9d5c6950111::MiscTestParams110 	bool needsTaskShader () const
111 	{
112 		return static_cast<bool>(taskCount);
113 	}
114 
drawCountvkt::MeshShader::__anond9d5c6950111::MiscTestParams115 	uint32_t drawCount () const
116 	{
117 		if (needsTaskShader())
118 			return taskCount.get();
119 		return meshCount;
120 	}
121 };
122 
123 using ParamsPtr = std::unique_ptr<MiscTestParams>;
124 
125 class MeshShaderMiscCase : public vkt::TestCase
126 {
127 public:
128 					MeshShaderMiscCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params);
~MeshShaderMiscCase(void)129 	virtual			~MeshShaderMiscCase		(void) {}
130 
131 	void			checkSupport			(Context& context) const override;
132 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
133 
134 protected:
135 	std::unique_ptr<MiscTestParams> m_params;
136 };
137 
MeshShaderMiscCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)138 MeshShaderMiscCase::MeshShaderMiscCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
139 	: vkt::TestCase	(testCtx, name, description)
140 	, m_params		(params.release())
141 {}
142 
checkSupport(Context & context) const143 void MeshShaderMiscCase::checkSupport (Context& context) const
144 {
145 	genericCheckSupport(context, m_params->needsTaskShader(), /*requireVertexStores*/false);
146 }
147 
148 // Adds the generic fragment shader. To be called by subclasses.
initPrograms(vk::SourceCollections & programCollection) const149 void MeshShaderMiscCase::initPrograms (vk::SourceCollections& programCollection) const
150 {
151 	std::string frag =
152 		"#version 450\n"
153 		"#extension GL_NV_mesh_shader : enable\n"
154 		"\n"
155 		"layout (location=0) in perprimitiveNV vec4 primitiveColor;\n"
156 		"layout (location=0) out vec4 outColor;\n"
157 		"\n"
158 		"void main ()\n"
159 		"{\n"
160 		"    outColor = primitiveColor;\n"
161 		"}\n"
162 		;
163 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
164 }
165 
166 class MeshShaderMiscInstance : public vkt::TestInstance
167 {
168 public:
MeshShaderMiscInstance(Context & context,const MiscTestParams * params)169 					MeshShaderMiscInstance	(Context& context, const MiscTestParams* params)
170 						: vkt::TestInstance	(context)
171 						, m_params			(params)
172 						, m_referenceLevel	()
173 					{
174 					}
175 
176 	void			generateSolidRefLevel	(const tcu::Vec4& color, std::unique_ptr<tcu::TextureLevel>& output);
177 	virtual void	generateReferenceLevel	() = 0;
178 
179 	virtual bool	verifyResult			(const tcu::ConstPixelBufferAccess& resultAccess, const tcu::TextureLevel& referenceLevel) const;
180 	virtual bool	verifyResult			(const tcu::ConstPixelBufferAccess& resultAccess) const;
181 	tcu::TestStatus	iterate					() override;
182 
183 protected:
184 	const MiscTestParams*				m_params;
185 	std::unique_ptr<tcu::TextureLevel>	m_referenceLevel;
186 };
187 
generateSolidRefLevel(const tcu::Vec4 & color,std::unique_ptr<tcu::TextureLevel> & output)188 void MeshShaderMiscInstance::generateSolidRefLevel (const tcu::Vec4& color, std::unique_ptr<tcu::TextureLevel>& output)
189 {
190 	const auto format		= getOutputFormat();
191 	const auto tcuFormat	= mapVkFormat(format);
192 
193 	const auto iWidth		= static_cast<int>(m_params->width);
194 	const auto iHeight		= static_cast<int>(m_params->height);
195 
196 	output.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
197 
198 	const auto access		= output->getAccess();
199 
200 	// Fill with solid color.
201 	tcu::clear(access, color);
202 }
203 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess) const204 bool MeshShaderMiscInstance::verifyResult (const tcu::ConstPixelBufferAccess& resultAccess) const
205 {
206 	return verifyResult(resultAccess, *m_referenceLevel);
207 }
208 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess,const tcu::TextureLevel & referenceLevel) const209 bool MeshShaderMiscInstance::verifyResult (const tcu::ConstPixelBufferAccess& resultAccess, const tcu::TextureLevel& referenceLevel) const
210 {
211 	const auto referenceAccess = referenceLevel.getAccess();
212 
213 	const auto refWidth		= referenceAccess.getWidth();
214 	const auto refHeight	= referenceAccess.getHeight();
215 	const auto refDepth		= referenceAccess.getDepth();
216 
217 	const auto resWidth		= resultAccess.getWidth();
218 	const auto resHeight	= resultAccess.getHeight();
219 	const auto resDepth		= resultAccess.getDepth();
220 
221 	DE_ASSERT(resWidth == refWidth || resHeight == refHeight || resDepth == refDepth);
222 
223 	// For release builds.
224 	DE_UNREF(refWidth);
225 	DE_UNREF(refHeight);
226 	DE_UNREF(refDepth);
227 	DE_UNREF(resWidth);
228 	DE_UNREF(resHeight);
229 	DE_UNREF(resDepth);
230 
231 	const auto outputFormat		= getOutputFormat();
232 	const auto expectedFormat	= mapVkFormat(outputFormat);
233 	const auto resFormat		= resultAccess.getFormat();
234 	const auto refFormat		= referenceAccess.getFormat();
235 
236 	DE_ASSERT(resFormat == expectedFormat && refFormat == expectedFormat);
237 
238 	// For release builds
239 	DE_UNREF(expectedFormat);
240 	DE_UNREF(resFormat);
241 	DE_UNREF(refFormat);
242 
243 	auto&			log				= m_context.getTestContext().getLog();
244 	const auto		threshold		= getCompareThreshold();
245 	const tcu::Vec4	thresholdVec	(threshold, threshold, threshold, threshold);
246 
247 	return tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, thresholdVec, tcu::COMPARE_LOG_ON_ERROR);
248 }
249 
iterate()250 tcu::TestStatus MeshShaderMiscInstance::iterate ()
251 {
252 	const auto&		vkd			= m_context.getDeviceInterface();
253 	const auto		device		= m_context.getDevice();
254 	auto&			alloc		= m_context.getDefaultAllocator();
255 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
256 	const auto		queue		= m_context.getUniversalQueue();
257 
258 	const auto		imageFormat	= getOutputFormat();
259 	const auto		tcuFormat	= mapVkFormat(imageFormat);
260 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
261 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
262 
263 	const VkImageCreateInfo colorBufferInfo =
264 	{
265 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
266 		nullptr,								//	const void*				pNext;
267 		0u,										//	VkImageCreateFlags		flags;
268 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
269 		imageFormat,							//	VkFormat				format;
270 		imageExtent,							//	VkExtent3D				extent;
271 		1u,										//	uint32_t				mipLevels;
272 		1u,										//	uint32_t				arrayLayers;
273 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
274 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
275 		imageUsage,								//	VkImageUsageFlags		usage;
276 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
277 		0u,										//	uint32_t				queueFamilyIndexCount;
278 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
279 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
280 	};
281 
282 	// Create color image and view.
283 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
284 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
285 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
286 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
287 
288 	// Create a memory buffer for verification.
289 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
290 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
291 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
292 
293 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
294 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
295 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
296 
297 	// Pipeline layout.
298 	const auto pipelineLayout = makePipelineLayout(vkd, device);
299 
300 	// Shader modules.
301 	const auto&	binaries	= m_context.getBinaryCollection();
302 	const auto	hasTask		= binaries.contains("task");
303 
304 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
305 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
306 
307 	Move<VkShaderModule> taskShader;
308 	if (hasTask)
309 		taskShader = createShaderModule(vkd, device, binaries.get("task"));
310 
311 	// Render pass.
312 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
313 
314 	// Framebuffer.
315 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
316 
317 	// Viewport and scissor.
318 	const std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
319 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(imageExtent));
320 
321 	// Color blending.
322 	const auto									colorWriteMask	= (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
323 	const VkPipelineColorBlendAttachmentState	blendAttState	=
324 	{
325 		VK_TRUE,				//	VkBool32				blendEnable;
326 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			srcColorBlendFactor;
327 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			dstColorBlendFactor;
328 		VK_BLEND_OP_ADD,		//	VkBlendOp				colorBlendOp;
329 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			srcAlphaBlendFactor;
330 		VK_BLEND_FACTOR_ONE,	//	VkBlendFactor			dstAlphaBlendFactor;
331 		VK_BLEND_OP_ADD,		//	VkBlendOp				alphaBlendOp;
332 		colorWriteMask,			//	VkColorComponentFlags	colorWriteMask;
333 	};
334 
335 	const VkPipelineColorBlendStateCreateInfo colorBlendInfo =
336 	{
337 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//	VkStructureType								sType;
338 		nullptr,													//	const void*									pNext;
339 		0u,															//	VkPipelineColorBlendStateCreateFlags		flags;
340 		VK_FALSE,													//	VkBool32									logicOpEnable;
341 		VK_LOGIC_OP_OR,												//	VkLogicOp									logicOp;
342 		1u,															//	uint32_t									attachmentCount;
343 		&blendAttState,												//	const VkPipelineColorBlendAttachmentState*	pAttachments;
344 		{ 0.0f, 0.0f, 0.0f, 0.0f },									//	float										blendConstants[4];
345 	};
346 
347 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
348 		taskShader.get(), meshShader.get(), fragShader.get(),
349 		renderPass.get(), viewports, scissors, 0u/*subpass*/,
350 		nullptr, nullptr, nullptr, &colorBlendInfo);
351 
352 	// Command pool and buffer.
353 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
354 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
355 	const auto cmdBuffer	= cmdBufferPtr.get();
356 
357 	beginCommandBuffer(vkd, cmdBuffer);
358 
359 	// Run pipeline.
360 	const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 0.0f);
361 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
362 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
363 	vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params->drawCount(), 0u);
364 	endRenderPass(vkd, cmdBuffer);
365 
366 	// Copy color buffer to verification buffer.
367 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
368 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
369 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
370 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
371 
372 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
373 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
374 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
375 
376 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
377 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
378 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
379 
380 	endCommandBuffer(vkd, cmdBuffer);
381 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
382 
383 	// Generate reference image and compare results.
384 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
385 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
386 
387 	generateReferenceLevel();
388 	invalidateAlloc(vkd, device, verificationBufferAlloc);
389 	if (!verifyResult(verificationAccess))
390 		TCU_FAIL("Result does not match reference; check log for details");
391 
392 	return tcu::TestStatus::pass("Pass");
393 }
394 
395 // Verify passing more complex data between the task and mesh shaders.
396 class ComplexTaskDataCase : public MeshShaderMiscCase
397 {
398 public:
ComplexTaskDataCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)399 					ComplexTaskDataCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
400 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
401 					{}
402 
403 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
404 	TestInstance*	createInstance			(Context& context) const override;
405 };
406 
407 class ComplexTaskDataInstance : public MeshShaderMiscInstance
408 {
409 public:
ComplexTaskDataInstance(Context & context,const MiscTestParams * params)410 	ComplexTaskDataInstance (Context& context, const MiscTestParams* params)
411 		: MeshShaderMiscInstance (context, params)
412 	{}
413 
414 	void	generateReferenceLevel	() override;
415 };
416 
generateReferenceLevel()417 void ComplexTaskDataInstance::generateReferenceLevel ()
418 {
419 	const auto format		= getOutputFormat();
420 	const auto tcuFormat	= mapVkFormat(format);
421 
422 	const auto iWidth		= static_cast<int>(m_params->width);
423 	const auto iHeight		= static_cast<int>(m_params->height);
424 
425 	const auto halfWidth	= iWidth / 2;
426 	const auto halfHeight	= iHeight / 2;
427 
428 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
429 
430 	const auto access		= m_referenceLevel->getAccess();
431 
432 	// Each image quadrant gets a different color.
433 	for (int y = 0; y < iHeight; ++y)
434 	for (int x = 0; x < iWidth; ++x)
435 	{
436 		const float	red			= ((y < halfHeight) ? 0.0f : 1.0f);
437 		const float	green		= ((x < halfWidth)  ? 0.0f : 1.0f);
438 		const auto	refColor	= tcu::Vec4(red, green, 1.0f, 1.0f);
439 		access.setPixel(refColor, x, y);
440 	}
441 }
442 
initPrograms(vk::SourceCollections & programCollection) const443 void ComplexTaskDataCase::initPrograms (vk::SourceCollections& programCollection) const
444 {
445 	// Add the generic fragment shader.
446 	MeshShaderMiscCase::initPrograms(programCollection);
447 
448 	const std::string taskDataDeclTemplate =
449 		"struct RowId {\n"
450 		"    uint id;\n"
451 		"};\n"
452 		"\n"
453 		"struct WorkGroupData {\n"
454 		"    float WorkGroupIdPlusOnex1000Iota[10];\n"
455 		"    RowId rowId;\n"
456 		"    uvec3 WorkGroupIdPlusOnex2000Iota;\n"
457 		"    vec2  WorkGroupIdPlusOnex3000Iota;\n"
458 		"};\n"
459 		"\n"
460 		"struct ExternalData {\n"
461 		"    float OneMillion;\n"
462 		"    uint  TwoMillion;\n"
463 		"    WorkGroupData workGroupData;\n"
464 		"};\n"
465 		"\n"
466 		"${INOUT} taskNV TaskData {\n"
467 		"    uint yes;\n"
468 		"    ExternalData externalData;\n"
469 		"} td;\n"
470 		;
471 	const tcu::StringTemplate taskDataDecl(taskDataDeclTemplate);
472 
473 	{
474 		std::map<std::string, std::string> taskMap;
475 		taskMap["INOUT"] = "out";
476 		std::ostringstream task;
477 		task
478 			<< "#version 450\n"
479 			<< "#extension GL_NV_mesh_shader : enable\n"
480 			<< "\n"
481 			<< "layout (local_size_x=1) in;\n"
482 			<< "\n"
483 			<< taskDataDecl.specialize(taskMap)
484 			<< "\n"
485 			<< "void main ()\n"
486 			<< "{\n"
487 			<< "    gl_TaskCountNV = 2u;\n"
488 			<< "    td.yes = 1u;\n"
489 			<< "    td.externalData.OneMillion = 1000000.0;\n"
490 			<< "    td.externalData.TwoMillion = 2000000u;\n"
491 			<< "    for (uint i = 0; i < 10; i++) {\n"
492 			<< "        td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] = float((gl_WorkGroupID.x + 1u) * 1000 + i);\n"
493 			<< "    }\n"
494 			<< "    {\n"
495 			<< "        uint baseVal = (gl_WorkGroupID.x + 1u) * 2000;\n"
496 			<< "        td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota = uvec3(baseVal, baseVal + 1, baseVal + 2);\n"
497 			<< "    }\n"
498 			<< "    {\n"
499 			<< "        uint baseVal = (gl_WorkGroupID.x + 1u) * 3000;\n"
500 			<< "        td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota = vec2(baseVal, baseVal + 1);\n"
501 			<< "    }\n"
502 			<< "    td.externalData.workGroupData.rowId.id = gl_WorkGroupID.x;\n"
503 			<< "}\n"
504 			;
505 		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
506 	}
507 
508 	{
509 		std::map<std::string, std::string> meshMap;
510 		meshMap["INOUT"] = "in";
511 		std::ostringstream mesh;
512 		mesh
513 			<< "#version 450\n"
514 			<< "#extension GL_NV_mesh_shader : enable\n"
515 			<< "\n"
516 			<< "layout(local_size_x=2) in;\n"
517 			<< "layout(triangles) out;\n"
518 			<< "layout(max_vertices=4, max_primitives=2) out;\n"
519 			<< "\n"
520 			<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
521 			<< "\n"
522 			<< taskDataDecl.specialize(meshMap)
523 			<< "\n"
524 			<< "void main ()\n"
525 			<< "{\n"
526 			<< "    bool dataOK = true;\n"
527 			<< "    dataOK = (dataOK && (td.yes == 1u));\n"
528 			<< "    dataOK = (dataOK && (td.externalData.OneMillion == 1000000.0 && td.externalData.TwoMillion == 2000000u));\n"
529 			<< "    uint rowId = td.externalData.workGroupData.rowId.id;\n"
530 			<< "    dataOK = (dataOK && (rowId == 0u || rowId == 1u));\n"
531 			<< "\n"
532 			<< "    {\n"
533 			<< "        uint baseVal = (rowId + 1u) * 1000u;\n"
534 			<< "        for (uint i = 0; i < 10; i++) {\n"
535 			<< "            if (td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] != float(baseVal + i)) {\n"
536 			<< "                dataOK = false;\n"
537 			<< "                break;\n"
538 			<< "            }\n"
539 			<< "        }\n"
540 			<< "    }\n"
541 			<< "\n"
542 			<< "    {\n"
543 			<< "        uint baseVal = (rowId + 1u) * 2000;\n"
544 			<< "        uvec3 expected = uvec3(baseVal, baseVal + 1, baseVal + 2);\n"
545 			<< "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota != expected) {\n"
546 			<< "            dataOK = false;\n"
547 			<< "        }\n"
548 			<< "    }\n"
549 			<< "\n"
550 			<< "    {\n"
551 			<< "        uint baseVal = (rowId + 1u) * 3000;\n"
552 			<< "        vec2 expected = vec2(baseVal, baseVal + 1);\n"
553 			<< "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota != expected) {\n"
554 			<< "            dataOK = false;\n"
555 			<< "        }\n"
556 			<< "    }\n"
557 			<< "\n"
558 			<< "    uint columnId = gl_WorkGroupID.x;\n"
559 			<< "\n"
560 			<< "    if (dataOK) {\n"
561 			<< "        gl_PrimitiveCountNV = 2u;\n"
562 			<< "    }\n"
563 			<< "    else {\n"
564 			<< "        gl_PrimitiveCountNV = 0u;\n"
565 			<< "        return;\n"
566 			<< "    }\n"
567 			<< "\n"
568 			<< "    const vec4 outColor = vec4(rowId, columnId, 1.0f, 1.0f);\n"
569 			<< "    triangleColor[0] = outColor;\n"
570 			<< "    triangleColor[1] = outColor;\n"
571 			<< "\n"
572 			<< "    // Each local invocation will generate two points and one triangle from the quad.\n"
573 			<< "    // The first local invocation will generate the top quad vertices.\n"
574 			<< "    // The second invocation will generate the two bottom vertices.\n"
575 			<< "    vec4 left  = vec4(0.0, 0.0, 0.0, 1.0);\n"
576 			<< "    vec4 right = vec4(1.0, 0.0, 0.0, 1.0);\n"
577 			<< "\n"
578 			<< "    float localInvocationOffsetY = float(gl_LocalInvocationID.x);\n"
579 			<< "    left.y  += localInvocationOffsetY;\n"
580 			<< "    right.y += localInvocationOffsetY;\n"
581 			<< "\n"
582 			<< "    // The code above creates a quad from (0, 0) to (1, 1) but we need to offset it\n"
583 			<< "    // in X and/or Y depending on the row and column, to place it in other quadrants.\n"
584 			<< "    float quadrantOffsetX = float(int(columnId) - 1);\n"
585 			<< "    float quadrantOffsetY = float(int(rowId) - 1);\n"
586 			<< "\n"
587 			<< "    left.x  += quadrantOffsetX;\n"
588 			<< "    right.x += quadrantOffsetX;\n"
589 			<< "\n"
590 			<< "    left.y  += quadrantOffsetY;\n"
591 			<< "    right.y += quadrantOffsetY;\n"
592 			<< "\n"
593 			<< "    uint baseVertexId = 2*gl_LocalInvocationID.x;\n"
594 			<< "    gl_MeshVerticesNV[baseVertexId + 0].gl_Position = left;\n"
595 			<< "    gl_MeshVerticesNV[baseVertexId + 1].gl_Position = right;\n"
596 			<< "\n"
597 			<< "    uint baseIndexId = 3*gl_LocalInvocationID.x;\n"
598 			<< "    // 0,1,2 or 1,2,3 (note: triangles alternate front face this way)\n"
599 			<< "    gl_PrimitiveIndicesNV[baseIndexId + 0] = 0 + gl_LocalInvocationID.x;\n"
600 			<< "    gl_PrimitiveIndicesNV[baseIndexId + 1] = 1 + gl_LocalInvocationID.x;\n"
601 			<< "    gl_PrimitiveIndicesNV[baseIndexId + 2] = 2 + gl_LocalInvocationID.x;\n"
602 			<< "}\n"
603 			;
604 		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
605 	}
606 }
607 
createInstance(Context & context) const608 TestInstance* ComplexTaskDataCase::createInstance (Context& context) const
609 {
610 	return new ComplexTaskDataInstance(context, m_params.get());
611 }
612 
613 // Verify drawing a single point.
614 class SinglePointCase : public MeshShaderMiscCase
615 {
616 public:
SinglePointCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)617 					SinglePointCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
618 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
619 					{}
620 
621 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
622 	TestInstance*	createInstance			(Context& context) const override;
623 };
624 
625 class SinglePointInstance : public MeshShaderMiscInstance
626 {
627 public:
SinglePointInstance(Context & context,const MiscTestParams * params)628 	SinglePointInstance (Context& context, const MiscTestParams* params)
629 		: MeshShaderMiscInstance (context, params)
630 	{}
631 
632 	void	generateReferenceLevel	() override;
633 };
634 
createInstance(Context & context) const635 TestInstance* SinglePointCase::createInstance (Context& context) const
636 {
637 	return new SinglePointInstance (context, m_params.get());
638 }
639 
initPrograms(vk::SourceCollections & programCollection) const640 void SinglePointCase::initPrograms (vk::SourceCollections& programCollection) const
641 {
642 	DE_ASSERT(!m_params->needsTaskShader());
643 
644 	MeshShaderMiscCase::initPrograms(programCollection);
645 
646 	std::ostringstream mesh;
647 	mesh
648 		<< "#version 450\n"
649 		<< "#extension GL_NV_mesh_shader : enable\n"
650 		<< "\n"
651 		<< "layout(local_size_x=1) in;\n"
652 		<< "layout(points) out;\n"
653 		<< "layout(max_vertices=256, max_primitives=256) out;\n"
654 		<< "\n"
655 		<< "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
656 		<< "\n"
657 		<< "void main ()\n"
658 		<< "{\n"
659 		<< "    gl_PrimitiveCountNV = 1u;\n"
660 		<< "    pointColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
661 		<< "    gl_MeshVerticesNV[0].gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n"
662 		<< "    gl_MeshVerticesNV[0].gl_PointSize = 1.0f;\n"
663 		<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
664 		<< "}\n"
665 		;
666 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
667 }
668 
generateReferenceLevel()669 void SinglePointInstance::generateReferenceLevel ()
670 {
671 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
672 
673 	const auto halfWidth	= static_cast<int>(m_params->width / 2u);
674 	const auto halfHeight	= static_cast<int>(m_params->height / 2u);
675 	const auto access		= m_referenceLevel->getAccess();
676 
677 	access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
678 }
679 
680 // Verify drawing a single line.
681 class SingleLineCase : public MeshShaderMiscCase
682 {
683 public:
SingleLineCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)684 					SingleLineCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
685 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
686 					{}
687 
688 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
689 	TestInstance*	createInstance			(Context& context) const override;
690 };
691 
692 class SingleLineInstance : public MeshShaderMiscInstance
693 {
694 public:
SingleLineInstance(Context & context,const MiscTestParams * params)695 	SingleLineInstance (Context& context, const MiscTestParams* params)
696 		: MeshShaderMiscInstance (context, params)
697 	{}
698 
699 	void	generateReferenceLevel	() override;
700 };
701 
createInstance(Context & context) const702 TestInstance* SingleLineCase::createInstance (Context& context) const
703 {
704 	return new SingleLineInstance (context, m_params.get());
705 }
706 
initPrograms(vk::SourceCollections & programCollection) const707 void SingleLineCase::initPrograms (vk::SourceCollections& programCollection) const
708 {
709 	DE_ASSERT(!m_params->needsTaskShader());
710 
711 	MeshShaderMiscCase::initPrograms(programCollection);
712 
713 	std::ostringstream mesh;
714 	mesh
715 		<< "#version 450\n"
716 		<< "#extension GL_NV_mesh_shader : enable\n"
717 		<< "\n"
718 		<< "layout(local_size_x=1) in;\n"
719 		<< "layout(lines) out;\n"
720 		<< "layout(max_vertices=256, max_primitives=256) out;\n"
721 		<< "\n"
722 		<< "layout (location=0) out perprimitiveNV vec4 lineColor[];\n"
723 		<< "\n"
724 		<< "void main ()\n"
725 		<< "{\n"
726 		<< "    gl_PrimitiveCountNV = 1u;\n"
727 		<< "    lineColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
728 		<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0f, 0.0f, 0.0f, 1.0f);\n"
729 		<< "    gl_MeshVerticesNV[1].gl_Position = vec4( 1.0f, 0.0f, 0.0f, 1.0f);\n"
730 		<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
731 		<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
732 		<< "}\n"
733 		;
734 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
735 }
736 
generateReferenceLevel()737 void SingleLineInstance::generateReferenceLevel ()
738 {
739 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
740 
741 	const auto iWidth		= static_cast<int>(m_params->width);
742 	const auto halfHeight	= static_cast<int>(m_params->height / 2u);
743 	const auto access		= m_referenceLevel->getAccess();
744 
745 	// Center row.
746 	for (int x = 0; x < iWidth; ++x)
747 		access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), x, halfHeight);
748 }
749 
750 // Verify drawing a single triangle.
751 class SingleTriangleCase : public MeshShaderMiscCase
752 {
753 public:
SingleTriangleCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)754 					SingleTriangleCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
755 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
756 					{}
757 
758 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
759 	TestInstance*	createInstance			(Context& context) const override;
760 };
761 
762 class SingleTriangleInstance : public MeshShaderMiscInstance
763 {
764 public:
SingleTriangleInstance(Context & context,const MiscTestParams * params)765 	SingleTriangleInstance (Context& context, const MiscTestParams* params)
766 		: MeshShaderMiscInstance (context, params)
767 	{}
768 
769 	void	generateReferenceLevel	() override;
770 };
771 
createInstance(Context & context) const772 TestInstance* SingleTriangleCase::createInstance (Context& context) const
773 {
774 	return new SingleTriangleInstance (context, m_params.get());
775 }
776 
initPrograms(vk::SourceCollections & programCollection) const777 void SingleTriangleCase::initPrograms (vk::SourceCollections& programCollection) const
778 {
779 	DE_ASSERT(!m_params->needsTaskShader());
780 
781 	MeshShaderMiscCase::initPrograms(programCollection);
782 
783 	const float halfPixelX = 2.0f / static_cast<float>(m_params->width);
784 	const float halfPixelY = 2.0f / static_cast<float>(m_params->height);
785 
786 	std::ostringstream mesh;
787 	mesh
788 		<< "#version 450\n"
789 		<< "#extension GL_NV_mesh_shader : enable\n"
790 		<< "\n"
791 		<< "layout(local_size_x=1) in;\n"
792 		<< "layout(triangles) out;\n"
793 		<< "layout(max_vertices=256, max_primitives=256) out;\n"
794 		<< "\n"
795 		<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
796 		<< "\n"
797 		<< "void main ()\n"
798 		<< "{\n"
799 		<< "    gl_PrimitiveCountNV = 1u;\n"
800 		<< "    triangleColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
801 		<< "    gl_MeshVerticesNV[0].gl_Position = vec4(" <<  halfPixelY << ", " << -halfPixelX << ", 0.0f, 1.0f);\n"
802 		<< "    gl_MeshVerticesNV[1].gl_Position = vec4(" <<  halfPixelY << ", " <<  halfPixelX << ", 0.0f, 1.0f);\n"
803 		<< "    gl_MeshVerticesNV[2].gl_Position = vec4(" << -halfPixelY << ", 0.0f, 0.0f, 1.0f);\n"
804 		<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
805 		<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
806 		<< "    gl_PrimitiveIndicesNV[2] = 2;\n"
807 		<< "}\n"
808 		;
809 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
810 }
811 
generateReferenceLevel()812 void SingleTriangleInstance::generateReferenceLevel ()
813 {
814 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
815 
816 	const auto halfWidth	= static_cast<int>(m_params->width / 2u);
817 	const auto halfHeight	= static_cast<int>(m_params->height / 2u);
818 	const auto access		= m_referenceLevel->getAccess();
819 
820 	// Single pixel in the center.
821 	access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
822 }
823 
824 // Verify drawing the maximum number of points.
825 class MaxPointsCase : public MeshShaderMiscCase
826 {
827 public:
MaxPointsCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)828 					MaxPointsCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
829 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
830 					{}
831 
832 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
833 	TestInstance*	createInstance			(Context& context) const override;
834 };
835 
836 class MaxPointsInstance : public MeshShaderMiscInstance
837 {
838 public:
MaxPointsInstance(Context & context,const MiscTestParams * params)839 	MaxPointsInstance (Context& context, const MiscTestParams* params)
840 		: MeshShaderMiscInstance (context, params)
841 	{}
842 
843 	void	generateReferenceLevel	() override;
844 };
845 
createInstance(Context & context) const846 TestInstance* MaxPointsCase::createInstance (Context& context) const
847 {
848 	return new MaxPointsInstance (context, m_params.get());
849 }
850 
initPrograms(vk::SourceCollections & programCollection) const851 void MaxPointsCase::initPrograms (vk::SourceCollections& programCollection) const
852 {
853 	DE_ASSERT(!m_params->needsTaskShader());
854 
855 	MeshShaderMiscCase::initPrograms(programCollection);
856 
857 	// Fill a 16x16 image with 256 points. Each of the 32 local invocations will handle a segment of 8 pixels. Two segments per row.
858 	DE_ASSERT(m_params->width == 16u && m_params->height == 16u);
859 
860 	std::ostringstream mesh;
861 	mesh
862 		<< "#version 450\n"
863 		<< "#extension GL_NV_mesh_shader : enable\n"
864 		<< "\n"
865 		<< "layout(local_size_x=32) in;\n"
866 		<< "layout(points) out;\n"
867 		<< "layout(max_vertices=256, max_primitives=256) out;\n"
868 		<< "\n"
869 		<< "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
870 		<< "\n"
871 		<< "void main ()\n"
872 		<< "{\n"
873 		<< "    gl_PrimitiveCountNV = 256u;\n"
874 		<< "    uint firstPixel = 8u * gl_LocalInvocationID.x;\n"
875 		<< "    uint row = firstPixel / 16u;\n"
876 		<< "    uint col = firstPixel % 16u;\n"
877 		<< "    float pixSize = 2.0f / 16.0f;\n"
878 		<< "    float yCoord = pixSize * (float(row) + 0.5f) - 1.0f;\n"
879 		<< "    float baseXCoord = pixSize * (float(col) + 0.5f) - 1.0f;\n"
880 		<< "    for (uint i = 0; i < 8u; i++) {\n"
881 		<< "        float xCoord = baseXCoord + pixSize * float(i);\n"
882 		<< "        uint pixId = firstPixel + i;\n"
883 		<< "        gl_MeshVerticesNV[pixId].gl_Position = vec4(xCoord, yCoord, 0.0f, 1.0f);\n"
884 		<< "        gl_MeshVerticesNV[pixId].gl_PointSize = 1.0f;\n"
885 		<< "        gl_PrimitiveIndicesNV[pixId] = pixId;\n"
886 		<< "        pointColor[pixId] = vec4(((xCoord + 1.0f) / 2.0f), ((yCoord + 1.0f) / 2.0f), 0.0f, 1.0f);\n"
887 		<< "    }\n"
888 		<< "}\n"
889 		;
890 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
891 }
892 
generateReferenceLevel()893 void MaxPointsInstance::generateReferenceLevel ()
894 {
895 	const auto format		= getOutputFormat();
896 	const auto tcuFormat	= mapVkFormat(format);
897 
898 	const auto iWidth		= static_cast<int>(m_params->width);
899 	const auto iHeight		= static_cast<int>(m_params->height);
900 	const auto fWidth		= static_cast<float>(m_params->width);
901 	const auto fHeight		= static_cast<float>(m_params->height);
902 
903 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
904 
905 	const auto access		= m_referenceLevel->getAccess();
906 
907 	// Fill with gradient like the shader does.
908 	for (int y = 0; y < iHeight; ++y)
909 	for (int x = 0; x < iWidth; ++x)
910 	{
911 		const tcu::Vec4 color (
912 			((static_cast<float>(x) + 0.5f) / fWidth),
913 			((static_cast<float>(y) + 0.5f) / fHeight),
914 			0.0f, 1.0f);
915 		access.setPixel(color, x, y);
916 	}
917 }
918 
919 // Verify drawing the maximum number of lines.
920 class MaxLinesCase : public MeshShaderMiscCase
921 {
922 public:
MaxLinesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)923 					MaxLinesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
924 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
925 					{}
926 
927 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
928 	TestInstance*	createInstance			(Context& context) const override;
929 };
930 
931 class MaxLinesInstance : public MeshShaderMiscInstance
932 {
933 public:
MaxLinesInstance(Context & context,const MiscTestParams * params)934 	MaxLinesInstance (Context& context, const MiscTestParams* params)
935 		: MeshShaderMiscInstance (context, params)
936 	{}
937 
938 	void	generateReferenceLevel	() override;
939 };
940 
createInstance(Context & context) const941 TestInstance* MaxLinesCase::createInstance (Context& context) const
942 {
943 	return new MaxLinesInstance (context, m_params.get());
944 }
945 
initPrograms(vk::SourceCollections & programCollection) const946 void MaxLinesCase::initPrograms (vk::SourceCollections& programCollection) const
947 {
948 	DE_ASSERT(!m_params->needsTaskShader());
949 
950 	MeshShaderMiscCase::initPrograms(programCollection);
951 
952 	// Fill a 1x1020 image with 255 lines, each line being 4 pixels tall. Each invocation will generate ~8 lines.
953 	DE_ASSERT(m_params->width == 1u && m_params->height == 1020u);
954 
955 	std::ostringstream mesh;
956 	mesh
957 		<< "#version 450\n"
958 		<< "#extension GL_NV_mesh_shader : enable\n"
959 		<< "\n"
960 		<< "layout(local_size_x=32) in;\n"
961 		<< "layout(lines) out;\n"
962 		<< "layout(max_vertices=256, max_primitives=255) out;\n"
963 		<< "\n"
964 		<< "layout (location=0) out perprimitiveNV vec4 lineColor[];\n"
965 		<< "\n"
966 		<< "void main ()\n"
967 		<< "{\n"
968 		<< "    gl_PrimitiveCountNV = 255u;\n"
969 		<< "    uint firstLine = 8u * gl_LocalInvocationID.x;\n"
970 		<< "    for (uint i = 0u; i < 8u; i++) {\n"
971 		<< "        uint lineId = firstLine + i;\n"
972 		<< "        uint topPixel = 4u * lineId;\n"
973 		<< "        uint bottomPixel = 3u + topPixel;\n"
974 		<< "        if (bottomPixel < 1020u) {\n"
975 		<< "            float bottomCoord = ((float(bottomPixel) + 1.0f) / 1020.0) * 2.0 - 1.0;\n"
976 		<< "            gl_MeshVerticesNV[lineId + 1u].gl_Position = vec4(0.0, bottomCoord, 0.0f, 1.0f);\n"
977 		<< "            gl_PrimitiveIndicesNV[lineId * 2u] = lineId;\n"
978 		<< "            gl_PrimitiveIndicesNV[lineId * 2u + 1u] = lineId + 1u;\n"
979 		<< "            lineColor[lineId] = vec4(0.0f, 1.0f, float(lineId) / 255.0f, 1.0f);\n"
980 		<< "        } else {\n"
981 		<< "            // The last iteration of the last invocation emits the first point\n"
982 		<< "            gl_MeshVerticesNV[0].gl_Position = vec4(0.0, -1.0, 0.0f, 1.0f);\n"
983 		<< "        }\n"
984 		<< "    }\n"
985 		<< "}\n"
986 		;
987 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
988 }
989 
generateReferenceLevel()990 void MaxLinesInstance::generateReferenceLevel ()
991 {
992 	const auto format		= getOutputFormat();
993 	const auto tcuFormat	= mapVkFormat(format);
994 
995 	const auto iWidth		= static_cast<int>(m_params->width);
996 	const auto iHeight		= static_cast<int>(m_params->height);
997 
998 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
999 
1000 	const auto access		= m_referenceLevel->getAccess();
1001 
1002 	// Fill lines, 4 pixels per line.
1003 	const uint32_t kNumLines = 255u;
1004 	const uint32_t kLineHeight = 4u;
1005 
1006 	for (uint32_t i = 0u; i < kNumLines; ++i)
1007 	{
1008 		const tcu::Vec4 color (0.0f, 1.0f, static_cast<float>(i) / static_cast<float>(kNumLines), 1.0f);
1009 		for (uint32_t j = 0u; j < kLineHeight; ++j)
1010 			access.setPixel(color, 0, i*kLineHeight + j);
1011 	}
1012 }
1013 
1014 // Verify drawing the maximum number of triangles.
1015 class MaxTrianglesCase : public MeshShaderMiscCase
1016 {
1017 public:
MaxTrianglesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1018 					MaxTrianglesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1019 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1020 					{}
1021 
1022 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1023 	TestInstance*	createInstance			(Context& context) const override;
1024 };
1025 
1026 class MaxTrianglesInstance : public MeshShaderMiscInstance
1027 {
1028 public:
MaxTrianglesInstance(Context & context,const MiscTestParams * params)1029 	MaxTrianglesInstance (Context& context, const MiscTestParams* params)
1030 		: MeshShaderMiscInstance (context, params)
1031 	{}
1032 
1033 	void	generateReferenceLevel	() override;
1034 };
1035 
createInstance(Context & context) const1036 TestInstance* MaxTrianglesCase::createInstance (Context& context) const
1037 {
1038 	return new MaxTrianglesInstance (context, m_params.get());
1039 }
1040 
initPrograms(vk::SourceCollections & programCollection) const1041 void MaxTrianglesCase::initPrograms (vk::SourceCollections& programCollection) const
1042 {
1043 	DE_ASSERT(!m_params->needsTaskShader());
1044 
1045 	MeshShaderMiscCase::initPrograms(programCollection);
1046 
1047 	// Fill a sufficiently large image with solid color. Generate a quarter of a circle with the center in the top left corner,
1048 	// using a triangle fan that advances from top to bottom. Each invocation will generate ~8 triangles.
1049 	std::ostringstream mesh;
1050 	mesh
1051 		<< "#version 450\n"
1052 		<< "#extension GL_NV_mesh_shader : enable\n"
1053 		<< "\n"
1054 		<< "layout(local_size_x=32) in;\n"
1055 		<< "layout(triangles) out;\n"
1056 		<< "layout(max_vertices=256, max_primitives=254) out;\n"
1057 		<< "\n"
1058 		<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
1059 		<< "\n"
1060 		<< "const float PI_2 = 1.57079632679489661923;\n"
1061 		<< "const float RADIUS = 4.5;\n"
1062 		<< "\n"
1063 		<< "void main ()\n"
1064 		<< "{\n"
1065 		<< "    gl_PrimitiveCountNV = 254u;\n"
1066 		<< "    uint firstTriangle = 8u * gl_LocalInvocationID.x;\n"
1067 		<< "    for (uint i = 0u; i < 8u; i++) {\n"
1068 		<< "        uint triangleId = firstTriangle + i;\n"
1069 		<< "        if (triangleId < 254u) {\n"
1070 		<< "            uint vertexId = triangleId + 2u;\n"
1071 		<< "            float angleProportion = float(vertexId - 1u) / 254.0f;\n"
1072 		<< "            float angle = PI_2 * angleProportion;\n"
1073 		<< "            float xCoord = cos(angle) * RADIUS - 1.0;\n"
1074 		<< "            float yCoord = sin(angle) * RADIUS - 1.0;\n"
1075 		<< "            gl_MeshVerticesNV[vertexId].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1076 		<< "            gl_PrimitiveIndicesNV[triangleId * 3u + 0u] = 0u;\n"
1077 		<< "            gl_PrimitiveIndicesNV[triangleId * 3u + 1u] = triangleId + 1u;\n"
1078 		<< "            gl_PrimitiveIndicesNV[triangleId * 3u + 2u] = triangleId + 2u;\n"
1079 		<< "            triangleColor[triangleId] = vec4(0.0f, 0.0f, 1.0f, 1.0f);\n"
1080 		<< "        } else {\n"
1081 		<< "            // The last iterations of the last invocation emit the first two vertices\n"
1082 		<< "            uint vertexId = triangleId - 254u;\n"
1083 		<< "            if (vertexId == 0u) {\n"
1084 		<< "                gl_MeshVerticesNV[0u].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
1085 		<< "            } else {\n"
1086 		<< "                gl_MeshVerticesNV[1u].gl_Position = vec4(RADIUS, -1.0, 0.0, 1.0);\n"
1087 		<< "            }\n"
1088 		<< "        }\n"
1089 		<< "    }\n"
1090 		<< "}\n"
1091 		;
1092 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
1093 }
1094 
generateReferenceLevel()1095 void MaxTrianglesInstance::generateReferenceLevel ()
1096 {
1097 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1098 }
1099 
1100 // Large work groups with many threads.
1101 class LargeWorkGroupCase : public MeshShaderMiscCase
1102 {
1103 public:
LargeWorkGroupCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1104 					LargeWorkGroupCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1105 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1106 					{}
1107 
1108 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1109 	TestInstance*	createInstance			(Context& context) const override;
1110 
1111 	static constexpr uint32_t kLocalInvocations = 32u;
1112 };
1113 
1114 class LargeWorkGroupInstance : public MeshShaderMiscInstance
1115 {
1116 public:
LargeWorkGroupInstance(Context & context,const MiscTestParams * params)1117 	LargeWorkGroupInstance (Context& context, const MiscTestParams* params)
1118 		: MeshShaderMiscInstance (context, params)
1119 	{}
1120 
1121 	void	generateReferenceLevel	() override;
1122 };
1123 
createInstance(Context & context) const1124 TestInstance* LargeWorkGroupCase::createInstance (Context& context) const
1125 {
1126 	return new LargeWorkGroupInstance(context, m_params.get());
1127 }
1128 
generateReferenceLevel()1129 void LargeWorkGroupInstance::generateReferenceLevel ()
1130 {
1131 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1132 }
1133 
initPrograms(vk::SourceCollections & programCollection) const1134 void LargeWorkGroupCase::initPrograms (vk::SourceCollections& programCollection) const
1135 {
1136 	const auto useTaskShader	= m_params->needsTaskShader();
1137 	const auto taskMultiplier	= (useTaskShader ? m_params->taskCount.get() : 1u);
1138 
1139 	// Add the frag shader.
1140 	MeshShaderMiscCase::initPrograms(programCollection);
1141 
1142 	std::ostringstream taskData;
1143 	taskData
1144 		<< "taskNV TaskData {\n"
1145 		<< "    uint parentTask[" << kLocalInvocations << "];\n"
1146 		<< "} td;\n"
1147 		;
1148 	const auto taskDataStr = taskData.str();
1149 
1150 	if (useTaskShader)
1151 	{
1152 		std::ostringstream task;
1153 		task
1154 			<< "#version 450\n"
1155 			<< "#extension GL_NV_mesh_shader : enable\n"
1156 			<< "\n"
1157 			<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1158 			<< "\n"
1159 			<< "out " << taskDataStr
1160 			<< "\n"
1161 			<< "void main () {\n"
1162 			<< "    gl_TaskCountNV = " << m_params->meshCount << ";\n"
1163 			<< "    td.parentTask[gl_LocalInvocationID.x] = gl_WorkGroupID.x;\n"
1164 			<< "}\n"
1165 			;
1166 		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
1167 	}
1168 
1169 	// Needed for the code below to work.
1170 	DE_ASSERT(m_params->width * m_params->height == taskMultiplier * m_params->meshCount * kLocalInvocations);
1171 	DE_UNREF(taskMultiplier); // For release builds.
1172 
1173 	// Emit one point per framebuffer pixel. The number of jobs (kLocalInvocations in each mesh shader work group, multiplied by the
1174 	// number of mesh work groups emitted by each task work group) must be the same as the total framebuffer size. Calculate a job
1175 	// ID corresponding to the current mesh shader invocation, and assign a pixel position to it. Draw a point at that position.
1176 	std::ostringstream mesh;
1177 	mesh
1178 		<< "#version 450\n"
1179 		<< "#extension GL_NV_mesh_shader : enable\n"
1180 		<< "\n"
1181 		<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1182 		<< "layout (points) out;\n"
1183 		<< "layout (max_vertices=" << kLocalInvocations << ", max_primitives=" << kLocalInvocations << ") out;\n"
1184 		<< "\n"
1185 		<< (useTaskShader ? "in " + taskDataStr : "")
1186 		<< "\n"
1187 		<< "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
1188 		<< "\n"
1189 		<< "void main () {\n"
1190 		;
1191 
1192 	if (useTaskShader)
1193 	{
1194 		mesh
1195 			<< "    uint parentTask = td.parentTask[0];\n"
1196 			<< "    if (td.parentTask[gl_LocalInvocationID.x] != parentTask) {\n"
1197 			<< "        return;\n"
1198 			<< "    }\n"
1199 			;
1200 	}
1201 	else
1202 	{
1203 		mesh << "    uint parentTask = 0;\n";
1204 	}
1205 
1206 	mesh
1207 		<< "    gl_PrimitiveCountNV = " << kLocalInvocations << ";\n"
1208 		<< "    uint jobId = ((parentTask * " << m_params->meshCount << ") + gl_WorkGroupID.x) * " << kLocalInvocations << " + gl_LocalInvocationID.x;\n"
1209 		<< "    uint row = jobId / " << m_params->width << ";\n"
1210 		<< "    uint col = jobId % " << m_params->width << ";\n"
1211 		<< "    float yCoord = (float(row + 0.5) / " << m_params->height << ".0) * 2.0 - 1.0;\n"
1212 		<< "    float xCoord = (float(col + 0.5) / " << m_params->width << ".0) * 2.0 - 1.0;\n"
1213 		<< "    gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1214 		<< "    gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_PointSize = 1.0;\n"
1215 		<< "    gl_PrimitiveIndicesNV[gl_LocalInvocationID.x] = gl_LocalInvocationID.x;\n"
1216 		<< "    pointColor[gl_LocalInvocationID.x] = vec4(0.0, 0.0, 1.0, 1.0);\n"
1217 		<< "}\n"
1218 		;
1219 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
1220 }
1221 
1222 // Tests that generate no primitives of a given type.
1223 enum class PrimitiveType { POINTS=0, LINES, TRIANGLES };
1224 
primitiveTypeName(PrimitiveType primitiveType)1225 std::string primitiveTypeName (PrimitiveType primitiveType)
1226 {
1227 	std::string primitiveName;
1228 
1229 	switch (primitiveType)
1230 	{
1231 	case PrimitiveType::POINTS:		primitiveName = "points";		break;
1232 	case PrimitiveType::LINES:		primitiveName = "lines";		break;
1233 	case PrimitiveType::TRIANGLES:	primitiveName = "triangles";	break;
1234 	default: DE_ASSERT(false); break;
1235 	}
1236 
1237 	return primitiveName;
1238 }
1239 
1240 struct NoPrimitivesParams : public MiscTestParams
1241 {
NoPrimitivesParamsvkt::MeshShader::__anond9d5c6950111::NoPrimitivesParams1242 	NoPrimitivesParams (const tcu::Maybe<uint32_t>& taskCount_, uint32_t meshCount_, uint32_t width_, uint32_t height_, PrimitiveType primitiveType_)
1243 		: MiscTestParams	(taskCount_, meshCount_, width_, height_)
1244 		, primitiveType		(primitiveType_)
1245 		{}
1246 
1247 	PrimitiveType primitiveType;
1248 };
1249 
1250 class NoPrimitivesCase : public MeshShaderMiscCase
1251 {
1252 public:
NoPrimitivesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1253 					NoPrimitivesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1254 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1255 					{}
1256 
1257 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1258 	TestInstance*	createInstance			(Context& context) const override;
1259 };
1260 
1261 class NoPrimitivesInstance : public MeshShaderMiscInstance
1262 {
1263 public:
NoPrimitivesInstance(Context & context,const MiscTestParams * params)1264 	NoPrimitivesInstance (Context& context, const MiscTestParams* params)
1265 		: MeshShaderMiscInstance (context, params)
1266 	{}
1267 
1268 	void	generateReferenceLevel	() override;
1269 };
1270 
generateReferenceLevel()1271 void NoPrimitivesInstance::generateReferenceLevel ()
1272 {
1273 	// No primitives: clear color.
1274 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
1275 }
1276 
createInstance(Context & context) const1277 TestInstance* NoPrimitivesCase::createInstance (Context& context) const
1278 {
1279 	return new NoPrimitivesInstance(context, m_params.get());
1280 }
1281 
initPrograms(vk::SourceCollections & programCollection) const1282 void NoPrimitivesCase::initPrograms (vk::SourceCollections& programCollection) const
1283 {
1284 	const auto params = dynamic_cast<NoPrimitivesParams*>(m_params.get());
1285 
1286 	DE_ASSERT(params);
1287 	DE_ASSERT(!params->needsTaskShader());
1288 
1289 	const auto primitiveName = primitiveTypeName(params->primitiveType);
1290 
1291 	std::ostringstream mesh;
1292 	mesh
1293 		<< "#version 450\n"
1294 		<< "#extension GL_NV_mesh_shader : enable\n"
1295 		<< "\n"
1296 		<< "layout (local_size_x=32) in;\n"
1297 		<< "layout (" << primitiveName << ") out;\n"
1298 		<< "layout (max_vertices=256, max_primitives=256) out;\n"
1299 		<< "\n"
1300 		<< "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
1301 		<< "\n"
1302 		<< "void main () {\n"
1303 		<< "    gl_PrimitiveCountNV = 0u;\n"
1304 		<< "}\n"
1305 		;
1306 
1307 	MeshShaderMiscCase::initPrograms(programCollection);
1308 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
1309 }
1310 
1311 class NoPrimitivesExtraWritesCase : public NoPrimitivesCase
1312 {
1313 public:
NoPrimitivesExtraWritesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1314 					NoPrimitivesExtraWritesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1315 						: NoPrimitivesCase (testCtx, name, description, std::move(params))
1316 					{}
1317 
1318 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1319 
1320 	static constexpr uint32_t kLocalInvocations = 32u;
1321 };
1322 
initPrograms(vk::SourceCollections & programCollection) const1323 void NoPrimitivesExtraWritesCase::initPrograms (vk::SourceCollections& programCollection) const
1324 {
1325 	const auto params = dynamic_cast<NoPrimitivesParams*>(m_params.get());
1326 
1327 	DE_ASSERT(params);
1328 	DE_ASSERT(m_params->needsTaskShader());
1329 
1330 	std::ostringstream taskData;
1331 	taskData
1332 		<< "taskNV TaskData {\n"
1333 		<< "    uint localInvocations[" << kLocalInvocations << "];\n"
1334 		<< "} td;\n"
1335 		;
1336 	const auto taskDataStr = taskData.str();
1337 
1338 	std::ostringstream task;
1339 	task
1340 		<< "#version 450\n"
1341 		<< "#extension GL_NV_mesh_shader : enable\n"
1342 		<< "\n"
1343 		<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1344 		<< "\n"
1345 		<< "out " << taskDataStr
1346 		<< "\n"
1347 		<< "void main () {\n"
1348 		<< "    gl_TaskCountNV = " << params->meshCount << ";\n"
1349 		<< "    td.localInvocations[gl_LocalInvocationID.x] = gl_LocalInvocationID.x;\n"
1350 		<< "}\n"
1351 		;
1352 	programCollection.glslSources.add("task") << glu::TaskSource(task.str());
1353 
1354 	const auto primitiveName = primitiveTypeName(params->primitiveType);
1355 
1356 	// Otherwise the shader would be illegal.
1357 	DE_ASSERT(kLocalInvocations > 2u);
1358 
1359 	uint32_t maxPrimitives = 0u;
1360 	switch (params->primitiveType)
1361 	{
1362 	case PrimitiveType::POINTS:		maxPrimitives = kLocalInvocations - 0u;	break;
1363 	case PrimitiveType::LINES:		maxPrimitives = kLocalInvocations - 1u;	break;
1364 	case PrimitiveType::TRIANGLES:	maxPrimitives = kLocalInvocations - 2u;	break;
1365 	default: DE_ASSERT(false); break;
1366 	}
1367 
1368 	const std::string pointSizeDecl	= ((params->primitiveType == PrimitiveType::POINTS)
1369 									? "        gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_PointSize = 1.0;\n"
1370 									: "");
1371 
1372 	std::ostringstream mesh;
1373 	mesh
1374 		<< "#version 450\n"
1375 		<< "#extension GL_NV_mesh_shader : enable\n"
1376 		<< "\n"
1377 		<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1378 		<< "layout (" << primitiveName << ") out;\n"
1379 		<< "layout (max_vertices=" << kLocalInvocations << ", max_primitives=" << maxPrimitives << ") out;\n"
1380 		<< "\n"
1381 		<< "in " << taskDataStr
1382 		<< "\n"
1383 		<< "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
1384 		<< "\n"
1385 		<< "shared uint sumOfIds;\n"
1386 		<< "\n"
1387 		<< "const float PI_2 = 1.57079632679489661923;\n"
1388 		<< "const float RADIUS = 1.0f;\n"
1389 		<< "\n"
1390 		<< "void main ()\n"
1391 		<< "{\n"
1392 		<< "    sumOfIds = 0u;\n"
1393 		<< "    memoryBarrierShared();\n"
1394 		<< "    barrier();\n"
1395 		<< "    atomicAdd(sumOfIds, td.localInvocations[gl_LocalInvocationID.x]);\n"
1396 		<< "    memoryBarrierShared();\n"
1397 		<< "    barrier();\n"
1398 		<< "    // This should dynamically give 0\n"
1399 		<< "    gl_PrimitiveCountNV = sumOfIds - (" << kLocalInvocations * (kLocalInvocations - 1u) / 2u << ");\n"
1400 		<< "\n"
1401 		<< "    // Emit points and primitives to the arrays in any case\n"
1402 		<< "    if (gl_LocalInvocationID.x > 0u) {\n"
1403 		<< "        float proportion = (float(gl_LocalInvocationID.x - 1u) + 0.5f) / float(" << kLocalInvocations << " - 1u);\n"
1404 		<< "        float angle = PI_2 * proportion;\n"
1405 		<< "        float xCoord = cos(angle) * RADIUS - 1.0;\n"
1406 		<< "        float yCoord = sin(angle) * RADIUS - 1.0;\n"
1407 		<< "        gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1408 		<< pointSizeDecl
1409 		<< "    } else {\n"
1410 		<< "        gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1411 		<< pointSizeDecl
1412 		<< "    }\n"
1413 		<< "    uint primitiveId = max(gl_LocalInvocationID.x, " << (maxPrimitives - 1u) << ");\n"
1414 		<< "    primitiveColor[primitiveId] = vec4(0.0, 0.0, 1.0, 1.0);\n"
1415 		;
1416 
1417 	if (params->primitiveType == PrimitiveType::POINTS)
1418 	{
1419 		mesh
1420 			<< "    gl_PrimitiveIndicesNV[primitiveId] = primitiveId;\n"
1421 			;
1422 	}
1423 	else if (params->primitiveType == PrimitiveType::LINES)
1424 	{
1425 		mesh
1426 			<< "    gl_PrimitiveIndicesNV[primitiveId * 2u + 0u] = primitiveId + 0u;\n"
1427 			<< "    gl_PrimitiveIndicesNV[primitiveId * 2u + 1u] = primitiveId + 1u;\n"
1428 			;
1429 	}
1430 	else if (params->primitiveType == PrimitiveType::TRIANGLES)
1431 	{
1432 		mesh
1433 			<< "    gl_PrimitiveIndicesNV[primitiveId * 3u + 0u] = 0u;\n"
1434 			<< "    gl_PrimitiveIndicesNV[primitiveId * 3u + 1u] = primitiveId + 1u;\n"
1435 			<< "    gl_PrimitiveIndicesNV[primitiveId * 3u + 2u] = primitiveId + 3u;\n"
1436 			;
1437 	}
1438 	else
1439 		DE_ASSERT(false);
1440 
1441 	mesh
1442 		<< "}\n"
1443 		;
1444 
1445 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
1446 
1447 	MeshShaderMiscCase::initPrograms(programCollection);
1448 }
1449 
1450 // Case testing barrier().
1451 class SimpleBarrierCase : public MeshShaderMiscCase
1452 {
1453 public:
SimpleBarrierCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1454 					SimpleBarrierCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1455 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1456 					{}
1457 
1458 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1459 	TestInstance*	createInstance			(Context& context) const override;
1460 
1461 	static constexpr uint32_t kLocalInvocations = 32u;
1462 };
1463 
1464 class SimpleBarrierInstance : public MeshShaderMiscInstance
1465 {
1466 public:
SimpleBarrierInstance(Context & context,const MiscTestParams * params)1467 	SimpleBarrierInstance (Context& context, const MiscTestParams* params)
1468 		: MeshShaderMiscInstance (context, params)
1469 	{}
1470 
1471 	void	generateReferenceLevel	() override;
1472 };
1473 
createInstance(Context & context) const1474 TestInstance* SimpleBarrierCase::createInstance (Context& context) const
1475 {
1476 	return new SimpleBarrierInstance(context, m_params.get());
1477 }
1478 
generateReferenceLevel()1479 void SimpleBarrierInstance::generateReferenceLevel ()
1480 {
1481 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1482 }
1483 
initPrograms(vk::SourceCollections & programCollection) const1484 void SimpleBarrierCase::initPrograms (vk::SourceCollections& programCollection) const
1485 {
1486 	// Generate frag shader.
1487 	MeshShaderMiscCase::initPrograms(programCollection);
1488 
1489 	DE_ASSERT(m_params->meshCount == 1u);
1490 	DE_ASSERT(m_params->width == 1u && m_params->height == 1u);
1491 
1492 	std::ostringstream meshPrimData;
1493 	meshPrimData
1494 			<< "gl_PrimitiveCountNV = 1u;\n"
1495 			<< "gl_MeshVerticesNV[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1496 			<< "gl_MeshVerticesNV[0].gl_PointSize = 1.0;\n"
1497 			<< "primitiveColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
1498 			<< "gl_PrimitiveIndicesNV[0] = 0;\n"
1499 			;
1500 	const std::string meshPrimStr	= meshPrimData.str();
1501 
1502 	const std::string taskOK		= "gl_TaskCountNV = 1u;\n";
1503 	const std::string taskFAIL		= "gl_TaskCountNV = 0u;\n";
1504 
1505 	const std::string meshOK		= meshPrimStr;
1506 	const std::string meshFAIL		= "gl_PrimitiveCountNV = 0u;\n";
1507 
1508 	const std::string okStatement	= (m_params->needsTaskShader() ? taskOK : meshOK);
1509 	const std::string failStatement	= (m_params->needsTaskShader() ? taskFAIL : meshFAIL);
1510 
1511 	const std::string	sharedDecl = "shared uint counter;\n\n";
1512 	std::ostringstream	verification;
1513 	verification
1514 		<< "counter = 0;\n"
1515 		<< "memoryBarrierShared();\n"
1516 		<< "barrier();\n"
1517 		<< "atomicAdd(counter, 1u);\n"
1518 		<< "memoryBarrierShared();\n"
1519 		<< "barrier();\n"
1520 		<< "if (gl_LocalInvocationID.x == 0u) {\n"
1521 		<< "    if (counter == " << kLocalInvocations << ") {\n"
1522 		<< "\n"
1523 		<< okStatement
1524 		<< "\n"
1525 		<< "    } else {\n"
1526 		<< "\n"
1527 		<< failStatement
1528 		<< "\n"
1529 		<< "    }\n"
1530 		<< "}\n"
1531 		;
1532 
1533 	// The mesh shader is very similar in both cases, so we use a template.
1534 	std::ostringstream meshTemplateStr;
1535 	meshTemplateStr
1536 		<< "#version 450\n"
1537 		<< "#extension GL_NV_mesh_shader : enable\n"
1538 		<< "\n"
1539 		<< "layout (local_size_x=${LOCAL_SIZE}) in;\n"
1540 		<< "layout (points) out;\n"
1541 		<< "layout (max_vertices=1, max_primitives=1) out;\n"
1542 		<< "\n"
1543 		<< "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
1544 		<< "\n"
1545 		<< "${GLOBALS:opt}"
1546 		<< "void main ()\n"
1547 		<< "{\n"
1548 		<< "${BODY}"
1549 		<< "}\n"
1550 		;
1551 	const tcu::StringTemplate meshTemplate(meshTemplateStr.str());
1552 
1553 	if (m_params->needsTaskShader())
1554 	{
1555 		std::ostringstream task;
1556 		task
1557 			<< "#version 450\n"
1558 			<< "#extension GL_NV_mesh_shader : enable\n"
1559 			<< "\n"
1560 			<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1561 			<< "\n"
1562 			<< sharedDecl
1563 			<< "void main ()\n"
1564 			<< "{\n"
1565 			<< verification.str()
1566 			<< "}\n"
1567 			;
1568 
1569 		std::map<std::string, std::string> replacements;
1570 		replacements["LOCAL_SIZE"]	= "1";
1571 		replacements["BODY"]		= meshPrimStr;
1572 
1573 		const auto meshStr = meshTemplate.specialize(replacements);
1574 
1575 		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
1576 		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
1577 	}
1578 	else
1579 	{
1580 		std::map<std::string, std::string> replacements;
1581 		replacements["LOCAL_SIZE"]	= std::to_string(kLocalInvocations);
1582 		replacements["BODY"]		= verification.str();
1583 		replacements["GLOBALS"]		= sharedDecl;
1584 
1585 		const auto meshStr = meshTemplate.specialize(replacements);
1586 
1587 		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
1588 	}
1589 }
1590 
1591 // Case testing memoryBarrierShared() and groupMemoryBarrier().
1592 enum class MemoryBarrierType { SHARED = 0, GROUP };
1593 
1594 struct MemoryBarrierParams : public MiscTestParams
1595 {
MemoryBarrierParamsvkt::MeshShader::__anond9d5c6950111::MemoryBarrierParams1596 	MemoryBarrierParams (const tcu::Maybe<uint32_t>& taskCount_, uint32_t meshCount_, uint32_t width_, uint32_t height_, MemoryBarrierType memBarrierType_)
1597 		: MiscTestParams	(taskCount_, meshCount_, width_, height_)
1598 		, memBarrierType	(memBarrierType_)
1599 	{}
1600 
1601 	MemoryBarrierType memBarrierType;
1602 
glslFuncvkt::MeshShader::__anond9d5c6950111::MemoryBarrierParams1603 	std::string glslFunc () const
1604 	{
1605 		std::string funcName;
1606 
1607 		switch (memBarrierType)
1608 		{
1609 		case MemoryBarrierType::SHARED:		funcName = "memoryBarrierShared";	break;
1610 		case MemoryBarrierType::GROUP:		funcName = "groupMemoryBarrier";	break;
1611 		default: DE_ASSERT(false); break;
1612 		}
1613 
1614 		return funcName;
1615 	}
1616 
1617 };
1618 
1619 class MemoryBarrierCase : public MeshShaderMiscCase
1620 {
1621 public:
MemoryBarrierCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1622 					MemoryBarrierCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1623 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
1624 					{}
1625 
1626 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1627 	TestInstance*	createInstance			(Context& context) const override;
1628 
1629 	static constexpr uint32_t kLocalInvocations = 2u;
1630 };
1631 
1632 class MemoryBarrierInstance : public MeshShaderMiscInstance
1633 {
1634 public:
MemoryBarrierInstance(Context & context,const MiscTestParams * params)1635 	MemoryBarrierInstance (Context& context, const MiscTestParams* params)
1636 		: MeshShaderMiscInstance (context, params)
1637 	{}
1638 
1639 	void	generateReferenceLevel	() override;
1640 	bool	verifyResult			(const tcu::ConstPixelBufferAccess& resultAccess) const override;
1641 
1642 protected:
1643 	// Allow two possible outcomes.
1644 	std::unique_ptr<tcu::TextureLevel>	m_referenceLevel2;
1645 };
1646 
createInstance(Context & context) const1647 TestInstance* MemoryBarrierCase::createInstance (Context& context) const
1648 {
1649 	return new MemoryBarrierInstance(context, m_params.get());
1650 }
1651 
generateReferenceLevel()1652 void MemoryBarrierInstance::generateReferenceLevel ()
1653 {
1654 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1655 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f), m_referenceLevel2);
1656 }
1657 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess) const1658 bool MemoryBarrierInstance::verifyResult (const tcu::ConstPixelBufferAccess& resultAccess) const
1659 {
1660 	// Any of the two results is considered valid.
1661 	// Clarify what we are checking in the logs; otherwise, they could be confusing.
1662 	auto& log = m_context.getTestContext().getLog();
1663 	const std::vector<tcu::TextureLevel*> levels = { m_referenceLevel.get(), m_referenceLevel2.get() };
1664 
1665 	bool good = false;
1666 	for (size_t i = 0; i < levels.size(); ++i)
1667 	{
1668 		log << tcu::TestLog::Message << "Comparing result with reference " << i << "..." << tcu::TestLog::EndMessage;
1669 		const auto success = MeshShaderMiscInstance::verifyResult(resultAccess, *levels[i]);
1670 		if (success)
1671 		{
1672 			log << tcu::TestLog::Message << "Match! The test has passed" << tcu::TestLog::EndMessage;
1673 			good = true;
1674 			break;
1675 		}
1676 	}
1677 
1678 	return good;
1679 }
1680 
initPrograms(vk::SourceCollections & programCollection) const1681 void MemoryBarrierCase::initPrograms (vk::SourceCollections& programCollection) const
1682 {
1683 	const auto params = dynamic_cast<MemoryBarrierParams*>(m_params.get());
1684 	DE_ASSERT(params);
1685 
1686 	// Generate frag shader.
1687 	MeshShaderMiscCase::initPrograms(programCollection);
1688 
1689 	DE_ASSERT(params->meshCount == 1u);
1690 	DE_ASSERT(params->width == 1u && params->height == 1u);
1691 
1692 	const bool taskShader = params->needsTaskShader();
1693 
1694 	const std::string	taskDataDecl	= "taskNV TaskData { float blue; } td;\n\n";
1695 	const std::string	inTaskData		= "in " + taskDataDecl;
1696 	const std::string	outTaskData		= "out " + taskDataDecl;
1697 	const auto			barrierFunc		= params->glslFunc();
1698 
1699 	std::ostringstream meshPrimData;
1700 	meshPrimData
1701 			<< "gl_PrimitiveCountNV = 1u;\n"
1702 			<< "gl_MeshVerticesNV[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1703 			<< "gl_MeshVerticesNV[0].gl_PointSize = 1.0;\n"
1704 			<< "primitiveColor[0] = vec4(0.0, 0.0, " << (taskShader ? "td.blue" : "float(iterations % 2u)") << ", 1.0);\n"
1705 			<< "gl_PrimitiveIndicesNV[0] = 0;\n"
1706 			;
1707 	const std::string meshPrimStr	= meshPrimData.str();
1708 
1709 	const std::string taskAction	= "gl_TaskCountNV = 1u;\ntd.blue = float(iterations % 2u);\n";
1710 	const std::string meshAction	= meshPrimStr;
1711 	const std::string action		= (taskShader ? taskAction : meshAction);
1712 
1713 	const std::string	sharedDecl = "shared uint flags[2];\n\n";
1714 	std::ostringstream	verification;
1715 	verification
1716 		<< "flags[gl_LocalInvocationID.x] = 0u;\n"
1717 		<< "barrier();\n"
1718 		<< "flags[gl_LocalInvocationID.x] = 1u;\n"
1719 		<<  barrierFunc << "();\n"
1720 		<< "uint otherInvocation = 1u - gl_LocalInvocationID.x;\n"
1721 		<< "uint iterations = 0u;\n"
1722 		<< "while (flags[otherInvocation] != 1u) {\n"
1723 		<< "    iterations++;\n"
1724 		<< "}\n"
1725 		<< "if (gl_LocalInvocationID.x == 0u) {\n"
1726 		<< "\n"
1727 		<< action
1728 		<< "\n"
1729 		<< "}\n"
1730 		;
1731 
1732 	// The mesh shader is very similar in both cases, so we use a template.
1733 	std::ostringstream meshTemplateStr;
1734 	meshTemplateStr
1735 		<< "#version 450\n"
1736 		<< "#extension GL_NV_mesh_shader : enable\n"
1737 		<< "\n"
1738 		<< "layout (local_size_x=${LOCAL_SIZE}) in;\n"
1739 		<< "layout (points) out;\n"
1740 		<< "layout (max_vertices=1, max_primitives=1) out;\n"
1741 		<< "\n"
1742 		<< "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
1743 		<< "\n"
1744 		<< "${GLOBALS}"
1745 		<< "void main ()\n"
1746 		<< "{\n"
1747 		<< "${BODY}"
1748 		<< "}\n"
1749 		;
1750 	const tcu::StringTemplate meshTemplate(meshTemplateStr.str());
1751 
1752 	if (params->needsTaskShader())
1753 	{
1754 		std::ostringstream task;
1755 		task
1756 			<< "#version 450\n"
1757 			<< "#extension GL_NV_mesh_shader : enable\n"
1758 			<< "\n"
1759 			<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1760 			<< "\n"
1761 			<< sharedDecl
1762 			<< outTaskData
1763 			<< "void main ()\n"
1764 			<< "{\n"
1765 			<< verification.str()
1766 			<< "}\n"
1767 			;
1768 
1769 		std::map<std::string, std::string> replacements;
1770 		replacements["LOCAL_SIZE"]	= "1";
1771 		replacements["BODY"]		= meshPrimStr;
1772 		replacements["GLOBALS"]		= inTaskData;
1773 
1774 		const auto meshStr = meshTemplate.specialize(replacements);
1775 
1776 		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
1777 		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
1778 	}
1779 	else
1780 	{
1781 		std::map<std::string, std::string> replacements;
1782 		replacements["LOCAL_SIZE"]	= std::to_string(kLocalInvocations);
1783 		replacements["BODY"]		= verification.str();
1784 		replacements["GLOBALS"]		= sharedDecl;
1785 
1786 		const auto meshStr = meshTemplate.specialize(replacements);
1787 
1788 		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
1789 	}
1790 }
1791 
1792 class CustomAttributesCase : public MeshShaderMiscCase
1793 {
1794 public:
CustomAttributesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)1795 					CustomAttributesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
1796 						: MeshShaderMiscCase(testCtx, name, description, std::move(params)) {}
~CustomAttributesCase(void)1797 	virtual			~CustomAttributesCase		(void) {}
1798 
1799 	TestInstance*	createInstance				(Context& context) const override;
1800 	void			checkSupport				(Context& context) const override;
1801 	void			initPrograms				(vk::SourceCollections& programCollection) const override;
1802 };
1803 
1804 class CustomAttributesInstance : public MeshShaderMiscInstance
1805 {
1806 public:
CustomAttributesInstance(Context & context,const MiscTestParams * params)1807 						CustomAttributesInstance	(Context& context, const MiscTestParams* params)
1808 							: MeshShaderMiscInstance(context, params) {}
~CustomAttributesInstance(void)1809 	virtual				~CustomAttributesInstance	(void) {}
1810 
1811 	void				generateReferenceLevel		() override;
1812 	tcu::TestStatus		iterate						(void) override;
1813 };
1814 
createInstance(Context & context) const1815 TestInstance* CustomAttributesCase::createInstance (Context& context) const
1816 {
1817 	return new CustomAttributesInstance(context, m_params.get());
1818 }
1819 
checkSupport(Context & context) const1820 void CustomAttributesCase::checkSupport (Context& context) const
1821 {
1822 	MeshShaderMiscCase::checkSupport(context);
1823 
1824 	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_MULTI_VIEWPORT);
1825 	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_CLIP_DISTANCE);
1826 }
1827 
initPrograms(vk::SourceCollections & programCollection) const1828 void CustomAttributesCase::initPrograms (vk::SourceCollections& programCollection) const
1829 {
1830 	std::ostringstream frag;
1831 	frag
1832 		<< "#version 450\n"
1833 		<< "#extension GL_NV_mesh_shader : enable\n"
1834 		<< "\n"
1835 		<< "layout (location=0) in vec4 customAttribute1;\n"
1836 		<< "layout (location=1) in flat float customAttribute2;\n"
1837 		<< "layout (location=2) in flat int customAttribute3;\n"
1838 		<< "\n"
1839 		<< "layout (location=3) in perprimitiveNV flat uvec4 customAttribute4;\n"
1840 		<< "layout (location=4) in perprimitiveNV float customAttribute5;\n"
1841 		<< "\n"
1842 		<< "layout (location=0) out vec4 outColor;\n"
1843 		<< "\n"
1844 		<< "void main ()\n"
1845 		<< "{\n"
1846 		<< "    bool goodPrimitiveID = (gl_PrimitiveID == 1000 || gl_PrimitiveID == 1001);\n"
1847 		<< "    bool goodViewportIndex = (gl_ViewportIndex == 1);\n"
1848 		<< "    bool goodCustom1 = (customAttribute1.x >= 0.25 && customAttribute1.x <= 0.5 &&\n"
1849 		<< "                        customAttribute1.y >= 0.5  && customAttribute1.y <= 1.0 &&\n"
1850 		<< "                        customAttribute1.z >= 10.0 && customAttribute1.z <= 20.0 &&\n"
1851 		<< "                        customAttribute1.w == 3.0);\n"
1852 		<< "    bool goodCustom2 = (customAttribute2 == 1.0 || customAttribute2 == 2.0);\n"
1853 		<< "    bool goodCustom3 = (customAttribute3 == 3 || customAttribute3 == 4);\n"
1854 		<< "    bool goodCustom4 = ((gl_PrimitiveID == 1000 && customAttribute4 == uvec4(100, 101, 102, 103)) ||\n"
1855 		<< "                        (gl_PrimitiveID == 1001 && customAttribute4 == uvec4(200, 201, 202, 203)));\n"
1856 		<< "    bool goodCustom5 = ((gl_PrimitiveID == 1000 && customAttribute5 == 6.0) ||\n"
1857 		<< "                        (gl_PrimitiveID == 1001 && customAttribute5 == 7.0));\n"
1858 		<< "    \n"
1859 		<< "    if (goodPrimitiveID && goodViewportIndex && goodCustom1 && goodCustom2 && goodCustom3 && goodCustom4 && goodCustom5) {\n"
1860 		<< "        outColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
1861 		<< "    } else {\n"
1862 		<< "        outColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
1863 		<< "    }\n"
1864 		<< "}\n"
1865 		;
1866 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
1867 
1868 	std::ostringstream pvdDataDeclStream;
1869 	pvdDataDeclStream
1870 		<< "    vec4 positions[4];\n"
1871 		<< "    float pointSizes[4];\n"
1872 		<< "    float clipDistances[4];\n"
1873 		<< "    vec4 custom1[4];\n"
1874 		<< "    float custom2[4];\n"
1875 		<< "    int custom3[4];\n"
1876 		;
1877 	const auto pvdDataDecl = pvdDataDeclStream.str();
1878 
1879 	std::ostringstream ppdDataDeclStream;
1880 	ppdDataDeclStream
1881 		<< "    int primitiveIds[2];\n"
1882 		<< "    int viewportIndices[2];\n"
1883 		<< "    uvec4 custom4[2];\n"
1884 		<< "    float custom5[2];\n"
1885 		;
1886 	const auto ppdDataDecl = ppdDataDeclStream.str();
1887 
1888 	std::ostringstream bindingsDeclStream;
1889 	bindingsDeclStream
1890 		<< "layout (set=0, binding=0, std430) buffer PerVertexData {\n"
1891 		<< pvdDataDecl
1892 		<< "} pvd;\n"
1893 		<< "layout (set=0, binding=1) uniform PerPrimitiveData {\n"
1894 		<< ppdDataDecl
1895 		<< "} ppd;\n"
1896 		<< "\n"
1897 		;
1898 	const auto bindingsDecl = bindingsDeclStream.str();
1899 
1900 	std::ostringstream taskDataStream;
1901 	taskDataStream
1902 		<< "taskNV TaskData {\n"
1903 		<< pvdDataDecl
1904 		<< ppdDataDecl
1905 		<< "} td;\n"
1906 		<< "\n"
1907 		;
1908 	const auto taskDataDecl = taskDataStream.str();
1909 
1910 	const auto taskShader = m_params->needsTaskShader();
1911 
1912 	const auto meshPvdPrefix = (taskShader ? "td" : "pvd");
1913 	const auto meshPpdPrefix = (taskShader ? "td" : "ppd");
1914 
1915 	std::ostringstream mesh;
1916 	mesh
1917 		<< "#version 450\n"
1918 		<< "#extension GL_NV_mesh_shader : enable\n"
1919 		<< "\n"
1920 		<< "layout (local_size_x=1) in;\n"
1921 		<< "layout (max_primitives=2, max_vertices=4) out;\n"
1922 		<< "layout (triangles) out;\n"
1923 		<< "\n"
1924 		<< "out gl_MeshPerVertexNV {\n"
1925 		<< "    vec4  gl_Position;\n"
1926 		<< "    float gl_PointSize;\n"
1927 		<< "    float gl_ClipDistance[1];\n"
1928 		<< "} gl_MeshVerticesNV[];\n"
1929 		<< "\n"
1930 		<< "layout (location=0) out vec4 customAttribute1[];\n"
1931 		<< "layout (location=1) out flat float customAttribute2[];\n"
1932 		<< "layout (location=2) out int customAttribute3[];\n"
1933 		<< "\n"
1934 		<< "layout (location=3) out perprimitiveNV uvec4 customAttribute4[];\n"
1935 		<< "layout (location=4) out perprimitiveNV float customAttribute5[];\n"
1936 		<< "\n"
1937 		<< "out perprimitiveNV gl_MeshPerPrimitiveNV {\n"
1938 		<< "  int gl_PrimitiveID;\n"
1939 		<< "  int gl_ViewportIndex;\n"
1940 		<< "} gl_MeshPrimitivesNV[];\n"
1941 		<< "\n"
1942 		<< (taskShader ? "in " + taskDataDecl : bindingsDecl)
1943 		<< "void main ()\n"
1944 		<< "{\n"
1945 		<< "    gl_PrimitiveCountNV = 2u;\n"
1946 		<< "\n"
1947 		<< "    gl_MeshVerticesNV[0].gl_Position = " << meshPvdPrefix << ".positions[0]; //vec4(-1.0, -1.0, 0.0, 1.0)\n"
1948 		<< "    gl_MeshVerticesNV[1].gl_Position = " << meshPvdPrefix << ".positions[1]; //vec4( 1.0, -1.0, 0.0, 1.0)\n"
1949 		<< "    gl_MeshVerticesNV[2].gl_Position = " << meshPvdPrefix << ".positions[2]; //vec4(-1.0,  1.0, 0.0, 1.0)\n"
1950 		<< "    gl_MeshVerticesNV[3].gl_Position = " << meshPvdPrefix << ".positions[3]; //vec4( 1.0,  1.0, 0.0, 1.0)\n"
1951 		<< "\n"
1952 		<< "    gl_MeshVerticesNV[0].gl_PointSize = " << meshPvdPrefix << ".pointSizes[0]; //1.0\n"
1953 		<< "    gl_MeshVerticesNV[1].gl_PointSize = " << meshPvdPrefix << ".pointSizes[1]; //1.0\n"
1954 		<< "    gl_MeshVerticesNV[2].gl_PointSize = " << meshPvdPrefix << ".pointSizes[2]; //1.0\n"
1955 		<< "    gl_MeshVerticesNV[3].gl_PointSize = " << meshPvdPrefix << ".pointSizes[3]; //1.0\n"
1956 		<< "\n"
1957 		<< "    // Remove geometry on the right side.\n"
1958 		<< "    gl_MeshVerticesNV[0].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[0]; // 1.0\n"
1959 		<< "    gl_MeshVerticesNV[1].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[1]; //-1.0\n"
1960 		<< "    gl_MeshVerticesNV[2].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[2]; // 1.0\n"
1961 		<< "    gl_MeshVerticesNV[3].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[3]; //-1.0\n"
1962 		<< "    \n"
1963 		<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
1964 		<< "    gl_PrimitiveIndicesNV[1] = 2;\n"
1965 		<< "    gl_PrimitiveIndicesNV[2] = 1;\n"
1966 		<< "\n"
1967 		<< "    gl_PrimitiveIndicesNV[3] = 2;\n"
1968 		<< "    gl_PrimitiveIndicesNV[4] = 3;\n"
1969 		<< "    gl_PrimitiveIndicesNV[5] = 1;\n"
1970 		<< "\n"
1971 		<< "    gl_MeshPrimitivesNV[0].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[0]; //1000\n"
1972 		<< "    gl_MeshPrimitivesNV[1].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[1]; //1001\n"
1973 		<< "\n"
1974 		<< "    gl_MeshPrimitivesNV[0].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[0]; //1\n"
1975 		<< "    gl_MeshPrimitivesNV[1].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[1]; //1\n"
1976 		<< "\n"
1977 		<< "    // Custom per-vertex attributes\n"
1978 		<< "    customAttribute1[0] = " << meshPvdPrefix << ".custom1[0]; //vec4(0.25, 0.5, 10.0, 3.0)\n"
1979 		<< "    customAttribute1[1] = " << meshPvdPrefix << ".custom1[1]; //vec4(0.25, 1.0, 20.0, 3.0)\n"
1980 		<< "    customAttribute1[2] = " << meshPvdPrefix << ".custom1[2]; //vec4( 0.5, 0.5, 20.0, 3.0)\n"
1981 		<< "    customAttribute1[3] = " << meshPvdPrefix << ".custom1[3]; //vec4( 0.5, 1.0, 10.0, 3.0)\n"
1982 		<< "\n"
1983 		<< "    customAttribute2[0] = " << meshPvdPrefix << ".custom2[0]; //1.0f\n"
1984 		<< "    customAttribute2[1] = " << meshPvdPrefix << ".custom2[1]; //1.0f\n"
1985 		<< "    customAttribute2[2] = " << meshPvdPrefix << ".custom2[2]; //2.0f\n"
1986 		<< "    customAttribute2[3] = " << meshPvdPrefix << ".custom2[3]; //2.0f\n"
1987 		<< "\n"
1988 		<< "    customAttribute3[0] = " << meshPvdPrefix << ".custom3[0]; //3\n"
1989 		<< "    customAttribute3[1] = " << meshPvdPrefix << ".custom3[1]; //3\n"
1990 		<< "    customAttribute3[2] = " << meshPvdPrefix << ".custom3[2]; //4\n"
1991 		<< "    customAttribute3[3] = " << meshPvdPrefix << ".custom3[3]; //4\n"
1992 		<< "\n"
1993 		<< "    // Custom per-primitive attributes.\n"
1994 		<< "    customAttribute4[0] = " << meshPpdPrefix << ".custom4[0]; //uvec4(100, 101, 102, 103)\n"
1995 		<< "    customAttribute4[1] = " << meshPpdPrefix << ".custom4[1]; //uvec4(200, 201, 202, 203)\n"
1996 		<< "\n"
1997 		<< "    customAttribute5[0] = " << meshPpdPrefix << ".custom5[0]; //6.0\n"
1998 		<< "    customAttribute5[1] = " << meshPpdPrefix << ".custom5[1]; //7.0\n"
1999 		<< "}\n"
2000 		;
2001 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
2002 
2003 	if (taskShader)
2004 	{
2005 		std::ostringstream task;
2006 		task
2007 			<< "#version 450\n"
2008 			<< "#extension GL_NV_mesh_shader : enable\n"
2009 			<< "\n"
2010 			<< "out " << taskDataDecl
2011 			<< bindingsDecl
2012 			<< "void main ()\n"
2013 			<< "{\n"
2014 			<< "    gl_TaskCountNV = " << m_params->meshCount << ";\n"
2015 			<< "\n"
2016 			<< "    td.positions[0] = pvd.positions[0];\n"
2017 			<< "    td.positions[1] = pvd.positions[1];\n"
2018 			<< "    td.positions[2] = pvd.positions[2];\n"
2019 			<< "    td.positions[3] = pvd.positions[3];\n"
2020 			<< "\n"
2021 			<< "    td.pointSizes[0] = pvd.pointSizes[0];\n"
2022 			<< "    td.pointSizes[1] = pvd.pointSizes[1];\n"
2023 			<< "    td.pointSizes[2] = pvd.pointSizes[2];\n"
2024 			<< "    td.pointSizes[3] = pvd.pointSizes[3];\n"
2025 			<< "\n"
2026 			<< "    td.clipDistances[0] = pvd.clipDistances[0];\n"
2027 			<< "    td.clipDistances[1] = pvd.clipDistances[1];\n"
2028 			<< "    td.clipDistances[2] = pvd.clipDistances[2];\n"
2029 			<< "    td.clipDistances[3] = pvd.clipDistances[3];\n"
2030 			<< "\n"
2031 			<< "    td.custom1[0] = pvd.custom1[0];\n"
2032 			<< "    td.custom1[1] = pvd.custom1[1];\n"
2033 			<< "    td.custom1[2] = pvd.custom1[2];\n"
2034 			<< "    td.custom1[3] = pvd.custom1[3];\n"
2035 			<< "\n"
2036 			<< "    td.custom2[0] = pvd.custom2[0];\n"
2037 			<< "    td.custom2[1] = pvd.custom2[1];\n"
2038 			<< "    td.custom2[2] = pvd.custom2[2];\n"
2039 			<< "    td.custom2[3] = pvd.custom2[3];\n"
2040 			<< "\n"
2041 			<< "    td.custom3[0] = pvd.custom3[0];\n"
2042 			<< "    td.custom3[1] = pvd.custom3[1];\n"
2043 			<< "    td.custom3[2] = pvd.custom3[2];\n"
2044 			<< "    td.custom3[3] = pvd.custom3[3];\n"
2045 			<< "\n"
2046 			<< "    td.primitiveIds[0] = ppd.primitiveIds[0];\n"
2047 			<< "    td.primitiveIds[1] = ppd.primitiveIds[1];\n"
2048 			<< "\n"
2049 			<< "    td.viewportIndices[0] = ppd.viewportIndices[0];\n"
2050 			<< "    td.viewportIndices[1] = ppd.viewportIndices[1];\n"
2051 			<< "\n"
2052 			<< "    td.custom4[0] = ppd.custom4[0];\n"
2053 			<< "    td.custom4[1] = ppd.custom4[1];\n"
2054 			<< "\n"
2055 			<< "    td.custom5[0] = ppd.custom5[0];\n"
2056 			<< "    td.custom5[1] = ppd.custom5[1];\n"
2057 			<< "}\n"
2058 			;
2059 		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
2060 	}
2061 }
2062 
generateReferenceLevel()2063 void CustomAttributesInstance::generateReferenceLevel ()
2064 {
2065 	const auto format		= getOutputFormat();
2066 	const auto tcuFormat	= mapVkFormat(format);
2067 
2068 	const auto iWidth		= static_cast<int>(m_params->width);
2069 	const auto iHeight		= static_cast<int>(m_params->height);
2070 
2071 	const auto halfWidth	= iWidth / 2;
2072 	const auto halfHeight	= iHeight / 2;
2073 
2074 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
2075 
2076 	const auto access		= m_referenceLevel->getAccess();
2077 	const auto clearColor	= tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
2078 	const auto blueColor	= tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
2079 
2080 	tcu::clear(access, clearColor);
2081 
2082 	// Fill the top left quarter.
2083 	for (int y = 0; y < halfWidth; ++y)
2084 	for (int x = 0; x < halfHeight; ++x)
2085 	{
2086 		access.setPixel(blueColor, x, y);
2087 	}
2088 }
2089 
iterate()2090 tcu::TestStatus CustomAttributesInstance::iterate ()
2091 {
2092 	struct PerVertexData
2093 	{
2094 		tcu::Vec4	positions[4];
2095 		float		pointSizes[4];
2096 		float		clipDistances[4];
2097 		tcu::Vec4	custom1[4];
2098 		float		custom2[4];
2099 		int32_t		custom3[4];
2100 	};
2101 
2102 	struct PerPrimitiveData
2103 	{
2104 		// Note some of these are declared as vectors to match the std140 layout.
2105 		tcu::IVec4	primitiveIds[2];
2106 		tcu::IVec4	viewportIndices[2];
2107 		tcu::UVec4	custom4[2];
2108 		tcu::Vec4	custom5[2];
2109 	};
2110 
2111 	const auto&		vkd			= m_context.getDeviceInterface();
2112 	const auto		device		= m_context.getDevice();
2113 	auto&			alloc		= m_context.getDefaultAllocator();
2114 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
2115 	const auto		queue		= m_context.getUniversalQueue();
2116 
2117 	const auto		imageFormat	= getOutputFormat();
2118 	const auto		tcuFormat	= mapVkFormat(imageFormat);
2119 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
2120 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2121 
2122 	const auto&		binaries	= m_context.getBinaryCollection();
2123 	const auto		hasTask		= binaries.contains("task");
2124 	const auto		bufStages	= (hasTask ? VK_SHADER_STAGE_TASK_BIT_NV : VK_SHADER_STAGE_MESH_BIT_NV);
2125 
2126 	const VkImageCreateInfo colorBufferInfo =
2127 	{
2128 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
2129 		nullptr,								//	const void*				pNext;
2130 		0u,										//	VkImageCreateFlags		flags;
2131 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
2132 		imageFormat,							//	VkFormat				format;
2133 		imageExtent,							//	VkExtent3D				extent;
2134 		1u,										//	uint32_t				mipLevels;
2135 		1u,										//	uint32_t				arrayLayers;
2136 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
2137 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
2138 		imageUsage,								//	VkImageUsageFlags		usage;
2139 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
2140 		0u,										//	uint32_t				queueFamilyIndexCount;
2141 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
2142 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
2143 	};
2144 
2145 	// Create color image and view.
2146 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
2147 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2148 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2149 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
2150 
2151 	// Create a memory buffer for verification.
2152 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
2153 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2154 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2155 
2156 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
2157 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
2158 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
2159 
2160 	// This needs to match what the fragment shader will expect.
2161 	const PerVertexData perVertexData =
2162 	{
2163 		//	tcu::Vec4	positions[4];
2164 		{
2165 			tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
2166 			tcu::Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
2167 			tcu::Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
2168 			tcu::Vec4( 1.0f,  1.0f, 0.0f, 1.0f),
2169 		},
2170 		//	float		pointSizes[4];
2171 		{ 1.0f, 1.0f, 1.0f, 1.0f, },
2172 		//	float		clipDistances[4];
2173 		{
2174 			1.0f,
2175 			-1.0f,
2176 			1.0f,
2177 			-1.0f,
2178 		},
2179 		//	tcu::Vec4	custom1[4];
2180 		{
2181 			tcu::Vec4(0.25, 0.5, 10.0, 3.0),
2182 			tcu::Vec4(0.25, 1.0, 20.0, 3.0),
2183 			tcu::Vec4( 0.5, 0.5, 20.0, 3.0),
2184 			tcu::Vec4( 0.5, 1.0, 10.0, 3.0),
2185 		},
2186 		//	float		custom2[4];
2187 		{ 1.0f, 1.0f, 2.0f, 2.0f, },
2188 		//	int32_t		custom3[4];
2189 		{ 3, 3, 4, 4 },
2190 	};
2191 
2192 	// This needs to match what the fragment shader will expect. Reminder: some of these are declared as gvec4 to match the std140
2193 	// layout, but only the first component is actually used.
2194 	const PerPrimitiveData perPrimitiveData =
2195 	{
2196 		//	int			primitiveIds[2];
2197 		{
2198 			tcu::IVec4(1000, 0, 0, 0),
2199 			tcu::IVec4(1001, 0, 0, 0),
2200 		},
2201 		//	int			viewportIndices[2];
2202 		{
2203 			tcu::IVec4(1, 0, 0, 0),
2204 			tcu::IVec4(1, 0, 0, 0),
2205 		},
2206 		//	uvec4		custom4[2];
2207 		{
2208 			tcu::UVec4(100u, 101u, 102u, 103u),
2209 			tcu::UVec4(200u, 201u, 202u, 203u),
2210 		},
2211 		//	float		custom5[2];
2212 		{
2213 			tcu::Vec4(6.0f, 0.0f, 0.0f, 0.0f),
2214 			tcu::Vec4(7.0f, 0.0f, 0.0f, 0.0f),
2215 		},
2216 	};
2217 
2218 	// Create and fill buffers with this data.
2219 	const auto			pvdSize		= static_cast<VkDeviceSize>(sizeof(perVertexData));
2220 	const auto			pvdInfo		= makeBufferCreateInfo(pvdSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2221 	BufferWithMemory	pvdData		(vkd, device, alloc, pvdInfo, MemoryRequirement::HostVisible);
2222 	auto&				pvdAlloc	= pvdData.getAllocation();
2223 	void*				pvdPtr		= pvdAlloc.getHostPtr();
2224 
2225 	const auto			ppdSize		= static_cast<VkDeviceSize>(sizeof(perPrimitiveData));
2226 	const auto			ppdInfo		= makeBufferCreateInfo(ppdSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
2227 	BufferWithMemory	ppdData		(vkd, device, alloc, ppdInfo, MemoryRequirement::HostVisible);
2228 	auto&				ppdAlloc	= ppdData.getAllocation();
2229 	void*				ppdPtr		= ppdAlloc.getHostPtr();
2230 
2231 	deMemcpy(pvdPtr, &perVertexData, sizeof(perVertexData));
2232 	deMemcpy(ppdPtr, &perPrimitiveData, sizeof(perPrimitiveData));
2233 
2234 	flushAlloc(vkd, device, pvdAlloc);
2235 	flushAlloc(vkd, device, ppdAlloc);
2236 
2237 	// Descriptor set layout.
2238 	DescriptorSetLayoutBuilder setLayoutBuilder;
2239 	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bufStages);
2240 	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, bufStages);
2241 	const auto setLayout = setLayoutBuilder.build(vkd, device);
2242 
2243 	// Create and update descriptor set.
2244 	DescriptorPoolBuilder descriptorPoolBuilder;
2245 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2246 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
2247 	const auto descriptorPool	= descriptorPoolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2248 	const auto descriptorSet	= makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
2249 
2250 	DescriptorSetUpdateBuilder updateBuilder;
2251 	const auto storageBufferInfo = makeDescriptorBufferInfo(pvdData.get(), 0ull, pvdSize);
2252 	const auto uniformBufferInfo = makeDescriptorBufferInfo(ppdData.get(), 0ull, ppdSize);
2253 	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &storageBufferInfo);
2254 	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferInfo);
2255 	updateBuilder.update(vkd, device);
2256 
2257 	// Pipeline layout.
2258 	const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
2259 
2260 	// Shader modules.
2261 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
2262 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
2263 
2264 	Move<VkShaderModule> taskShader;
2265 	if (hasTask)
2266 		taskShader = createShaderModule(vkd, device, binaries.get("task"));
2267 
2268 	// Render pass.
2269 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
2270 
2271 	// Framebuffer.
2272 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
2273 
2274 	// Viewport and scissor.
2275 	const auto						topHalf		= makeViewport(imageExtent.width, imageExtent.height / 2u);
2276 	const std::vector<VkViewport>	viewports	{ makeViewport(imageExtent), topHalf };
2277 	const std::vector<VkRect2D>		scissors	(2u, makeRect2D(imageExtent));
2278 
2279 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
2280 		taskShader.get(), meshShader.get(), fragShader.get(),
2281 		renderPass.get(), viewports, scissors);
2282 
2283 	// Command pool and buffer.
2284 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
2285 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2286 	const auto cmdBuffer	= cmdBufferPtr.get();
2287 
2288 	beginCommandBuffer(vkd, cmdBuffer);
2289 
2290 	// Run pipeline.
2291 	const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 0.0f);
2292 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2293 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2294 	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
2295 	vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params->drawCount(), 0u);
2296 	endRenderPass(vkd, cmdBuffer);
2297 
2298 	// Copy color buffer to verification buffer.
2299 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
2300 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
2301 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
2302 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
2303 
2304 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
2305 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
2306 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
2307 
2308 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
2309 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
2310 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
2311 
2312 	endCommandBuffer(vkd, cmdBuffer);
2313 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2314 
2315 	// Generate reference image and compare results.
2316 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
2317 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
2318 
2319 	generateReferenceLevel();
2320 	invalidateAlloc(vkd, device, verificationBufferAlloc);
2321 	if (!verifyResult(verificationAccess))
2322 		TCU_FAIL("Result does not match reference; check log for details");
2323 
2324 	return tcu::TestStatus::pass("Pass");
2325 }
2326 
2327 // Tests that use push constants in the new stages.
2328 class PushConstantCase : public MeshShaderMiscCase
2329 {
2330 public:
PushConstantCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)2331 					PushConstantCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
2332 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
2333 					{}
2334 
2335 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
2336 	TestInstance*	createInstance			(Context& context) const override;
2337 };
2338 
2339 class PushConstantInstance : public MeshShaderMiscInstance
2340 {
2341 public:
PushConstantInstance(Context & context,const MiscTestParams * params)2342 	PushConstantInstance (Context& context, const MiscTestParams* params)
2343 		: MeshShaderMiscInstance (context, params)
2344 	{}
2345 
2346 	void			generateReferenceLevel	() override;
2347 	tcu::TestStatus	iterate					() override;
2348 };
2349 
createInstance(Context & context) const2350 TestInstance* PushConstantCase::createInstance (Context& context) const
2351 {
2352 	return new PushConstantInstance(context, m_params.get());
2353 }
2354 
generateReferenceLevel()2355 void PushConstantInstance::generateReferenceLevel ()
2356 {
2357 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2358 }
2359 
initPrograms(vk::SourceCollections & programCollection) const2360 void PushConstantCase::initPrograms (vk::SourceCollections& programCollection) const
2361 {
2362 	const auto useTaskShader	= m_params->needsTaskShader();
2363 	const auto pcNumFloats		= (useTaskShader ? 2u : 4u);
2364 
2365 	std::ostringstream pushConstantStream;
2366 	pushConstantStream
2367 		<< "layout (push_constant, std430) uniform PushConstantBlock {\n"
2368 		<< "    layout (offset=${PCOFFSET}) float values[" << pcNumFloats << "];\n"
2369 		<< "} pc;\n"
2370 		<< "\n"
2371 		;
2372 	const tcu::StringTemplate pushConstantsTemplate (pushConstantStream.str());
2373 	using TemplateMap = std::map<std::string, std::string>;
2374 
2375 	std::ostringstream taskDataStream;
2376 	taskDataStream
2377 		<< "taskNV TaskData {\n"
2378 		<< "    float values[2];\n"
2379 		<< "} td;\n"
2380 		<< "\n"
2381 		;
2382 	const auto taskDataDecl = taskDataStream.str();
2383 
2384 	if (useTaskShader)
2385 	{
2386 		TemplateMap taskMap;
2387 		taskMap["PCOFFSET"] = std::to_string(2u * sizeof(float));
2388 
2389 		std::ostringstream task;
2390 		task
2391 			<< "#version 450\n"
2392 			<< "#extension GL_NV_mesh_shader : enable\n"
2393 			<< "\n"
2394 			<< "layout(local_size_x=1) in;\n"
2395 			<< "\n"
2396 			<< "out " << taskDataDecl
2397 			<< pushConstantsTemplate.specialize(taskMap)
2398 			<< "void main ()\n"
2399 			<< "{\n"
2400 			<< "    gl_TaskCountNV = " << m_params->meshCount << ";\n"
2401 			<< "\n"
2402 			<< "    td.values[0] = pc.values[0];\n"
2403 			<< "    td.values[1] = pc.values[1];\n"
2404 			<< "}\n"
2405 			;
2406 		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
2407 	}
2408 
2409 	{
2410 		const std::string blue	= (useTaskShader ? "td.values[0] + pc.values[0]" : "pc.values[0] + pc.values[2]");
2411 		const std::string alpha	= (useTaskShader ? "td.values[1] + pc.values[1]" : "pc.values[1] + pc.values[3]");
2412 
2413 		TemplateMap meshMap;
2414 		meshMap["PCOFFSET"] = "0";
2415 
2416 		std::ostringstream mesh;
2417 		mesh
2418 			<< "#version 450\n"
2419 			<< "#extension GL_NV_mesh_shader : enable\n"
2420 			<< "\n"
2421 			<< "layout(local_size_x=1) in;\n"
2422 			<< "layout(triangles) out;\n"
2423 			<< "layout(max_vertices=3, max_primitives=1) out;\n"
2424 			<< "\n"
2425 			<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
2426 			<< "\n"
2427 			<< pushConstantsTemplate.specialize(meshMap)
2428 			<< (useTaskShader ? "in " + taskDataDecl : "")
2429 			<< "void main ()\n"
2430 			<< "{\n"
2431 			<< "    gl_PrimitiveCountNV = 1;\n"
2432 			<< "\n"
2433 			<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
2434 			<< "    gl_MeshVerticesNV[1].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
2435 			<< "    gl_MeshVerticesNV[2].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
2436 			<< "\n"
2437 			<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
2438 			<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
2439 			<< "    gl_PrimitiveIndicesNV[2] = 2;\n"
2440 			<< "\n"
2441 			<< "    triangleColor[0] = vec4(0.0, 0.0, " << blue << ", " << alpha << ");\n"
2442 			<< "}\n"
2443 			;
2444 		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
2445 	}
2446 
2447 	// Add default fragment shader.
2448 	MeshShaderMiscCase::initPrograms(programCollection);
2449 }
2450 
iterate()2451 tcu::TestStatus PushConstantInstance::iterate ()
2452 {
2453 	const auto&		vkd			= m_context.getDeviceInterface();
2454 	const auto		device		= m_context.getDevice();
2455 	auto&			alloc		= m_context.getDefaultAllocator();
2456 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
2457 	const auto		queue		= m_context.getUniversalQueue();
2458 
2459 	const auto		imageFormat	= getOutputFormat();
2460 	const auto		tcuFormat	= mapVkFormat(imageFormat);
2461 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
2462 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2463 
2464 	const auto&		binaries	= m_context.getBinaryCollection();
2465 	const auto		hasTask		= binaries.contains("task");
2466 
2467 	const VkImageCreateInfo colorBufferInfo =
2468 	{
2469 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
2470 		nullptr,								//	const void*				pNext;
2471 		0u,										//	VkImageCreateFlags		flags;
2472 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
2473 		imageFormat,							//	VkFormat				format;
2474 		imageExtent,							//	VkExtent3D				extent;
2475 		1u,										//	uint32_t				mipLevels;
2476 		1u,										//	uint32_t				arrayLayers;
2477 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
2478 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
2479 		imageUsage,								//	VkImageUsageFlags		usage;
2480 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
2481 		0u,										//	uint32_t				queueFamilyIndexCount;
2482 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
2483 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
2484 	};
2485 
2486 	// Create color image and view.
2487 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
2488 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2489 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2490 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
2491 
2492 	// Create a memory buffer for verification.
2493 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
2494 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2495 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2496 
2497 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
2498 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
2499 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
2500 
2501 	// Push constant ranges.
2502 	std::vector<float> pcData { 0.25f, 0.25f, 0.75f, 0.75f };
2503 	const auto pcSize		= static_cast<uint32_t>(de::dataSize(pcData));
2504 	const auto pcHalfSize	= pcSize / 2u;
2505 
2506 	std::vector<VkPushConstantRange> pcRanges;
2507 	if (hasTask)
2508 	{
2509 		pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_NV, 0u, pcHalfSize));
2510 		pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_TASK_BIT_NV, pcHalfSize, pcHalfSize));
2511 	}
2512 	else
2513 	{
2514 		pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_NV, 0u, pcSize));
2515 	}
2516 
2517 	// Pipeline layout.
2518 	const auto pipelineLayout = makePipelineLayout(vkd, device, 0u, nullptr, static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
2519 
2520 	// Shader modules.
2521 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
2522 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
2523 
2524 	Move<VkShaderModule> taskShader;
2525 	if (hasTask)
2526 		taskShader = createShaderModule(vkd, device, binaries.get("task"));
2527 
2528 	// Render pass.
2529 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
2530 
2531 	// Framebuffer.
2532 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
2533 
2534 	// Viewport and scissor.
2535 	const std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
2536 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(imageExtent));
2537 
2538 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
2539 		taskShader.get(), meshShader.get(), fragShader.get(),
2540 		renderPass.get(), viewports, scissors);
2541 
2542 	// Command pool and buffer.
2543 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
2544 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2545 	const auto cmdBuffer	= cmdBufferPtr.get();
2546 
2547 	beginCommandBuffer(vkd, cmdBuffer);
2548 
2549 	// Run pipeline.
2550 	const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 0.0f);
2551 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2552 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2553 	for (const auto& range : pcRanges)
2554 		vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size, reinterpret_cast<const char*>(pcData.data()) + range.offset);
2555 	vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params->drawCount(), 0u);
2556 	endRenderPass(vkd, cmdBuffer);
2557 
2558 	// Copy color buffer to verification buffer.
2559 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
2560 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
2561 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
2562 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
2563 
2564 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
2565 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
2566 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
2567 
2568 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
2569 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
2570 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
2571 
2572 	endCommandBuffer(vkd, cmdBuffer);
2573 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2574 
2575 	// Generate reference image and compare results.
2576 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
2577 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
2578 
2579 	generateReferenceLevel();
2580 	invalidateAlloc(vkd, device, verificationBufferAlloc);
2581 	if (!verifyResult(verificationAccess))
2582 		TCU_FAIL("Result does not match reference; check log for details");
2583 
2584 	return tcu::TestStatus::pass("Pass");
2585 }
2586 
2587 // Use large work group size, large number of vertices and large number of primitives.
2588 struct MaximizeThreadsParams : public MiscTestParams
2589 {
MaximizeThreadsParamsvkt::MeshShader::__anond9d5c6950111::MaximizeThreadsParams2590 	MaximizeThreadsParams	(const tcu::Maybe<uint32_t>& taskCount_, uint32_t meshCount_, uint32_t width_, uint32_t height_,
2591 							 uint32_t localSize_, uint32_t numVertices_, uint32_t numPrimitives_)
2592 		: MiscTestParams	(taskCount_, meshCount_, width_, height_)
2593 		, localSize			(localSize_)
2594 		, numVertices		(numVertices_)
2595 		, numPrimitives		(numPrimitives_)
2596 		{}
2597 
2598 	uint32_t localSize;
2599 	uint32_t numVertices;
2600 	uint32_t numPrimitives;
2601 
checkSupportvkt::MeshShader::__anond9d5c6950111::MaximizeThreadsParams2602 	void checkSupport (Context& context) const
2603 	{
2604 		const auto& properties = context.getMeshShaderProperties();
2605 
2606 		if (localSize > properties.maxMeshWorkGroupSize[0])
2607 			TCU_THROW(NotSupportedError, "Required local size not supported");
2608 
2609 		if (numVertices > properties.maxMeshOutputVertices)
2610 			TCU_THROW(NotSupportedError, "Required number of output vertices not supported");
2611 
2612 		if (numPrimitives > properties.maxMeshOutputPrimitives)
2613 			TCU_THROW(NotSupportedError, "Required number of output primitives not supported");
2614 	}
2615 };
2616 
2617 // Focus on the number of primitives.
2618 class MaximizePrimitivesCase : public MeshShaderMiscCase
2619 {
2620 public:
MaximizePrimitivesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)2621 					MaximizePrimitivesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
2622 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
2623 					{
2624 						const auto mtParams = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2625 						DE_ASSERT(mtParams);
2626 						DE_UNREF(mtParams); // For release builds.
2627 					}
2628 
2629 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
2630 	void			checkSupport			(Context& context) const override;
2631 	TestInstance*	createInstance			(Context& context) const override;
2632 };
2633 
2634 class MaximizePrimitivesInstance : public MeshShaderMiscInstance
2635 {
2636 public:
MaximizePrimitivesInstance(Context & context,const MiscTestParams * params)2637 	MaximizePrimitivesInstance (Context& context, const MiscTestParams* params)
2638 		: MeshShaderMiscInstance (context, params)
2639 	{}
2640 
2641 	void	generateReferenceLevel	() override;
2642 };
2643 
createInstance(Context & context) const2644 TestInstance* MaximizePrimitivesCase::createInstance (Context& context) const
2645 {
2646 	return new MaximizePrimitivesInstance (context, m_params.get());
2647 }
2648 
checkSupport(Context & context) const2649 void MaximizePrimitivesCase::checkSupport (Context& context) const
2650 {
2651 	MeshShaderMiscCase::checkSupport(context);
2652 
2653 	const auto params = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2654 	params->checkSupport(context);
2655 }
2656 
initPrograms(vk::SourceCollections & programCollection) const2657 void MaximizePrimitivesCase::initPrograms (vk::SourceCollections& programCollection) const
2658 {
2659 	const auto params = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2660 
2661 	DE_ASSERT(!params->needsTaskShader());
2662 	MeshShaderMiscCase::initPrograms(programCollection);
2663 
2664 	// Idea behind the test: generate 128 vertices, 1 per each pixel in a 128x1 image. Then, use each vertex to generate two points,
2665 	// adding the colors of each point using color blending to make sure every point is properly generated.
2666 
2667 	DE_ASSERT(params->numPrimitives == params->numVertices * 2u);
2668 	DE_ASSERT(params->numVertices == params->width);
2669 
2670 	const auto verticesPerInvocation	= params->numVertices / params->localSize;
2671 	const auto primitivesPerVertex		= params->numPrimitives / params->numVertices;
2672 
2673 	std::ostringstream mesh;
2674 	mesh
2675 		<< "#version 450\n"
2676 		<< "#extension GL_NV_mesh_shader : enable\n"
2677 		<< "\n"
2678 		<< "layout(local_size_x=" << params->localSize << ") in;\n"
2679 		<< "layout(points) out;\n"
2680 		<< "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
2681 		<< "\n"
2682 		<< "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
2683 		<< "\n"
2684 		<< "const uint verticesPerInvocation = " << verticesPerInvocation << ";\n"
2685 		<< "const uint primitivesPerVertex   = " << primitivesPerVertex << ";\n"
2686 		<< "\n"
2687 		<< "vec4 colors[primitivesPerVertex] = vec4[](\n"
2688 		<< "    vec4(0.0, 0.0, 1.0, 1.0),\n"
2689 		<< "    vec4(1.0, 0.0, 0.0, 1.0)\n"
2690 		<< ");\n"
2691 		<< "void main ()\n"
2692 		<< "{\n"
2693 		<< "    gl_PrimitiveCountNV = " << params->numPrimitives << ";\n"
2694 		<< "    const uint firstVertex = gl_LocalInvocationIndex * verticesPerInvocation;\n"
2695 		<< "    for (uint i = 0u; i < verticesPerInvocation; ++i)\n"
2696 		<< "    {\n"
2697 		<< "        const uint vertexNumber = firstVertex + i;\n"
2698 		<< "        const float xCoord = ((float(vertexNumber) + 0.5) / " << params->width << ".0) * 2.0 - 1.0;\n"
2699 		<< "        const float yCoord = 0.0;\n"
2700 		<< "        gl_MeshVerticesNV[vertexNumber].gl_Position = vec4(xCoord, yCoord, 0.0f, 1.0f);\n"
2701 		<< "        gl_MeshVerticesNV[vertexNumber].gl_PointSize = 1.0f;\n"
2702 		<< "        for (uint j = 0u; j < primitivesPerVertex; ++j)\n"
2703 		<< "        {\n"
2704 		<< "            const uint primitiveNumber = vertexNumber * primitivesPerVertex + j;\n"
2705 		<< "            gl_PrimitiveIndicesNV[primitiveNumber] = vertexNumber;\n"
2706 		<< "            pointColor[primitiveNumber] = colors[j];\n"
2707 		<< "        }\n"
2708 		<< "    }\n"
2709 		<< "}\n"
2710 		;
2711 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
2712 }
2713 
generateReferenceLevel()2714 void MaximizePrimitivesInstance::generateReferenceLevel ()
2715 {
2716 	generateSolidRefLevel(tcu::Vec4(1.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2717 }
2718 
2719 // Focus on the number of vertices.
2720 class MaximizeVerticesCase : public MeshShaderMiscCase
2721 {
2722 public:
MaximizeVerticesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)2723 					MaximizeVerticesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
2724 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
2725 					{
2726 						const auto mtParams = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2727 						DE_ASSERT(mtParams);
2728 						DE_UNREF(mtParams); // For release builds.
2729 					}
2730 
2731 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
2732 	void			checkSupport			(Context& context) const override;
2733 	TestInstance*	createInstance			(Context& context) const override;
2734 };
2735 
2736 class MaximizeVerticesInstance : public MeshShaderMiscInstance
2737 {
2738 public:
MaximizeVerticesInstance(Context & context,const MiscTestParams * params)2739 	MaximizeVerticesInstance (Context& context, const MiscTestParams* params)
2740 		: MeshShaderMiscInstance (context, params)
2741 	{}
2742 
2743 	void	generateReferenceLevel	() override;
2744 };
2745 
createInstance(Context & context) const2746 TestInstance* MaximizeVerticesCase::createInstance (Context& context) const
2747 {
2748 	return new MaximizeVerticesInstance (context, m_params.get());
2749 }
2750 
checkSupport(Context & context) const2751 void MaximizeVerticesCase::checkSupport (Context& context) const
2752 {
2753 	MeshShaderMiscCase::checkSupport(context);
2754 
2755 	const auto params = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2756 	params->checkSupport(context);
2757 }
2758 
initPrograms(vk::SourceCollections & programCollection) const2759 void MaximizeVerticesCase::initPrograms (vk::SourceCollections& programCollection) const
2760 {
2761 	const auto params = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2762 
2763 	DE_ASSERT(!params->needsTaskShader());
2764 	MeshShaderMiscCase::initPrograms(programCollection);
2765 
2766 	// Idea behind the test: cover a framebuffer using a triangle quad per pixel (4 vertices, 2 triangles).
2767 	DE_ASSERT(params->numVertices == params->numPrimitives * 2u);
2768 	DE_ASSERT(params->numPrimitives == params->width * 2u);
2769 
2770 	const auto pixelsPerInvocation		= params->width / params->localSize;
2771 	const auto verticesPerPixel			= 4u;
2772 	const auto primitivesPerPixel		= 2u;
2773 	const auto verticesPerInvocation	= pixelsPerInvocation * verticesPerPixel;
2774 	const auto primitivesPerInvocation	= pixelsPerInvocation * primitivesPerPixel;
2775 
2776 	std::ostringstream mesh;
2777 	mesh
2778 		<< "#version 450\n"
2779 		<< "#extension GL_NV_mesh_shader : enable\n"
2780 		<< "\n"
2781 		<< "layout(local_size_x=" << params->localSize << ") in;\n"
2782 		<< "layout(triangles) out;\n"
2783 		<< "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
2784 		<< "\n"
2785 		<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
2786 		<< "\n"
2787 		<< "const uint pixelsPerInvocation     = " << pixelsPerInvocation << ";\n"
2788 		<< "const uint verticesPerInvocation   = " << verticesPerInvocation << ";\n"
2789 		<< "const uint primitivesPerInvocation = " << primitivesPerInvocation << ";\n"
2790 		<< "const uint indicesPerInvocation    = primitivesPerInvocation * 3u;\n"
2791 		<< "const uint verticesPerPixel        = " << verticesPerPixel << ";\n"
2792 		<< "const uint primitivesPerPixel      = " << primitivesPerPixel << ";\n"
2793 		<< "const uint indicesPerPixel         = primitivesPerPixel * 3u;\n"
2794 		<< "\n"
2795 		<< "void main ()\n"
2796 		<< "{\n"
2797 		<< "    gl_PrimitiveCountNV = " << params->numPrimitives << ";\n"
2798 		<< "\n"
2799 		<< "    const uint firstPixel    = gl_LocalInvocationIndex * pixelsPerInvocation;\n"
2800 		<< "    const float pixelWidth   = 2.0 / float(" << params->width << ");\n"
2801 		<< "    const float quarterWidth = pixelWidth / 4.0;\n"
2802 		<< "\n"
2803 		<< "    for (uint pixelIdx = 0u; pixelIdx < pixelsPerInvocation; ++pixelIdx)\n"
2804 		<< "    {\n"
2805 		<< "        const uint pixelId      = firstPixel + pixelIdx;\n"
2806 		<< "        const float pixelCenter = (float(pixelId) + 0.5) / float(" << params->width << ") * 2.0 - 1.0;\n"
2807 		<< "        const float left        = pixelCenter - quarterWidth;\n"
2808 		<< "        const float right       = pixelCenter + quarterWidth;\n"
2809 		<< "\n"
2810 		<< "        const uint firstVertex = gl_LocalInvocationIndex * verticesPerInvocation + pixelIdx * verticesPerPixel;\n"
2811 		<< "        gl_MeshVerticesNV[firstVertex + 0].gl_Position = vec4(left,  -1.0, 0.0f, 1.0f);\n"
2812 		<< "        gl_MeshVerticesNV[firstVertex + 1].gl_Position = vec4(left,   1.0, 0.0f, 1.0f);\n"
2813 		<< "        gl_MeshVerticesNV[firstVertex + 2].gl_Position = vec4(right, -1.0, 0.0f, 1.0f);\n"
2814 		<< "        gl_MeshVerticesNV[firstVertex + 3].gl_Position = vec4(right,  1.0, 0.0f, 1.0f);\n"
2815 		<< "\n"
2816 		<< "        const uint firstPrimitive = gl_LocalInvocationIndex * primitivesPerInvocation + pixelIdx * primitivesPerPixel;\n"
2817 		<< "        triangleColor[firstPrimitive + 0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
2818 		<< "        triangleColor[firstPrimitive + 1] = vec4(0.0, 0.0, 1.0, 1.0);\n"
2819 		<< "\n"
2820 		<< "        const uint firstIndex = gl_LocalInvocationIndex * indicesPerInvocation + pixelIdx * indicesPerPixel;\n"
2821 		<< "        gl_PrimitiveIndicesNV[firstIndex + 0] = firstVertex + 0;\n"
2822 		<< "        gl_PrimitiveIndicesNV[firstIndex + 1] = firstVertex + 1;\n"
2823 		<< "        gl_PrimitiveIndicesNV[firstIndex + 2] = firstVertex + 2;\n"
2824 		<< "        gl_PrimitiveIndicesNV[firstIndex + 3] = firstVertex + 1;\n"
2825 		<< "        gl_PrimitiveIndicesNV[firstIndex + 4] = firstVertex + 3;\n"
2826 		<< "        gl_PrimitiveIndicesNV[firstIndex + 5] = firstVertex + 2;\n"
2827 		<< "    }\n"
2828 		<< "}\n"
2829 		;
2830 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
2831 }
2832 
generateReferenceLevel()2833 void MaximizeVerticesInstance::generateReferenceLevel ()
2834 {
2835 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2836 }
2837 
2838 // Focus on the number of invocations.
2839 class MaximizeInvocationsCase : public MeshShaderMiscCase
2840 {
2841 public:
MaximizeInvocationsCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)2842 					MaximizeInvocationsCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
2843 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
2844 					{
2845 						const auto mtParams = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2846 						DE_ASSERT(mtParams);
2847 						DE_UNREF(mtParams); // For release builds.
2848 					}
2849 
2850 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
2851 	void			checkSupport			(Context& context) const override;
2852 	TestInstance*	createInstance			(Context& context) const override;
2853 };
2854 
2855 class MaximizeInvocationsInstance : public MeshShaderMiscInstance
2856 {
2857 public:
MaximizeInvocationsInstance(Context & context,const MiscTestParams * params)2858 	MaximizeInvocationsInstance (Context& context, const MiscTestParams* params)
2859 		: MeshShaderMiscInstance (context, params)
2860 	{}
2861 
2862 	void	generateReferenceLevel	() override;
2863 };
2864 
createInstance(Context & context) const2865 TestInstance* MaximizeInvocationsCase::createInstance (Context& context) const
2866 {
2867 	return new MaximizeInvocationsInstance (context, m_params.get());
2868 }
2869 
checkSupport(Context & context) const2870 void MaximizeInvocationsCase::checkSupport (Context& context) const
2871 {
2872 	MeshShaderMiscCase::checkSupport(context);
2873 
2874 	const auto params = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2875 	params->checkSupport(context);
2876 }
2877 
initPrograms(vk::SourceCollections & programCollection) const2878 void MaximizeInvocationsCase::initPrograms (vk::SourceCollections& programCollection) const
2879 {
2880 	const auto params = dynamic_cast<MaximizeThreadsParams*>(m_params.get());
2881 
2882 	DE_ASSERT(!params->needsTaskShader());
2883 	MeshShaderMiscCase::initPrograms(programCollection);
2884 
2885 	// Idea behind the test: use two invocations to generate one point per framebuffer pixel.
2886 	DE_ASSERT(params->localSize == params->width * 2u);
2887 	DE_ASSERT(params->localSize == params->numPrimitives * 2u);
2888 	DE_ASSERT(params->localSize == params->numVertices * 2u);
2889 
2890 	std::ostringstream mesh;
2891 	mesh
2892 		<< "#version 450\n"
2893 		<< "#extension GL_NV_mesh_shader : enable\n"
2894 		<< "\n"
2895 		<< "layout(local_size_x=" << params->localSize << ") in;\n"
2896 		<< "layout(points) out;\n"
2897 		<< "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
2898 		<< "\n"
2899 		<< "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
2900 		<< "\n"
2901 		<< "void main ()\n"
2902 		<< "{\n"
2903 		<< "    gl_PrimitiveCountNV = " << params->numPrimitives << ";\n"
2904 		<< "    const uint pixelId = gl_LocalInvocationIndex / 2u;\n"
2905 		<< "    if (gl_LocalInvocationIndex % 2u == 0u)\n"
2906 		<< "    {\n"
2907 		<< "        const float xCoord = (float(pixelId) + 0.5) / float(" << params->width << ") * 2.0 - 1.0;\n"
2908 		<< "        gl_MeshVerticesNV[pixelId].gl_Position = vec4(xCoord, 0.0, 0.0f, 1.0f);\n"
2909 		<< "        gl_MeshVerticesNV[pixelId].gl_PointSize = 1.0f;\n"
2910 		<< "    }\n"
2911 		<< "    else\n"
2912 		<< "    {\n"
2913 		<< "        gl_PrimitiveIndicesNV[pixelId] = pixelId;\n"
2914 		<< "        pointColor[pixelId] = vec4(0.0, 0.0, 1.0, 1.0);\n"
2915 		<< "    }\n"
2916 		<< "}\n"
2917 		;
2918 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
2919 }
2920 
generateReferenceLevel()2921 void MaximizeInvocationsInstance::generateReferenceLevel ()
2922 {
2923 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2924 }
2925 
2926 // Tests checking varied interfaces between task, mesh and frag.
2927 
2928 enum class Owner
2929 {
2930 	VERTEX = 0,
2931 	PRIMITIVE,
2932 };
2933 
2934 enum class DataType
2935 {
2936 	INTEGER = 0,
2937 	FLOAT,
2938 };
2939 
2940 // Note: 8-bit variables not available for Input/Output.
2941 enum class BitWidth
2942 {
2943 	B64 = 64,
2944 	B32 = 32,
2945 	B16 = 16,
2946 };
2947 
2948 enum class DataDim
2949 {
2950 	SCALAR = 1,
2951 	VEC2   = 2,
2952 	VEC3   = 3,
2953 	VEC4   = 4,
2954 };
2955 
2956 enum class Interpolation
2957 {
2958 	NORMAL = 0,
2959 	FLAT,
2960 };
2961 
2962 enum class Direction
2963 {
2964 	IN = 0,
2965 	OUT,
2966 };
2967 
2968 // Interface variable.
2969 struct IfaceVar
2970 {
2971 	static constexpr uint32_t kNumVertices		= 4u;
2972 	static constexpr uint32_t kNumPrimitives	= 2u;
2973 	static constexpr uint32_t kVarsPerType		= 2u;
2974 
IfaceVarvkt::MeshShader::__anond9d5c6950111::IfaceVar2975 	IfaceVar (Owner owner_, DataType dataType_, BitWidth bitWidth_, DataDim dataDim_, Interpolation interpolation_, uint32_t index_)
2976 		: owner			(owner_)
2977 		, dataType		(dataType_)
2978 		, bitWidth		(bitWidth_)
2979 		, dataDim		(dataDim_)
2980 		, interpolation	(interpolation_)
2981 		, index			(index_)
2982 		{
2983 			DE_ASSERT(!(dataType == DataType::INTEGER && interpolation == Interpolation::NORMAL));
2984 			DE_ASSERT(!(owner == Owner::PRIMITIVE && interpolation == Interpolation::NORMAL));
2985 			DE_ASSERT(!(dataType == DataType::FLOAT && bitWidth == BitWidth::B64 && interpolation == Interpolation::NORMAL));
2986 			DE_ASSERT(index < kVarsPerType);
2987 		}
2988 
2989 	// This constructor needs to be defined for the code to compile, but it should never be actually called.
2990 	// To make sure it's not used, the index is defined to be very large, which should trigger the assertion in getName() below.
IfaceVarvkt::MeshShader::__anond9d5c6950111::IfaceVar2991 	IfaceVar ()
2992 		: owner			(Owner::VERTEX)
2993 		, dataType		(DataType::FLOAT)
2994 		, bitWidth		(BitWidth::B32)
2995 		, dataDim		(DataDim::VEC4)
2996 		, interpolation	(Interpolation::NORMAL)
2997 		, index			(std::numeric_limits<uint32_t>::max())
2998 		{
2999 		}
3000 
3001 	Owner			owner;
3002 	DataType		dataType;
3003 	BitWidth		bitWidth;
3004 	DataDim			dataDim;
3005 	Interpolation	interpolation;
3006 	uint32_t		index; // In case there are several variables matching this type.
3007 
3008 	// The variable name will be unique and depend on its type.
getNamevkt::MeshShader::__anond9d5c6950111::IfaceVar3009 	std::string getName () const
3010 	{
3011 		DE_ASSERT(index < kVarsPerType);
3012 
3013 		std::ostringstream name;
3014 		name
3015 			<< ((owner == Owner::VERTEX) ? "vert" : "prim") << "_"
3016 			<< ((dataType == DataType::INTEGER) ? "i" : "f") << static_cast<int>(bitWidth)
3017 			<< "d" << static_cast<int>(dataDim) << "_"
3018 			<< ((interpolation == Interpolation::NORMAL) ? "inter" : "flat") << "_"
3019 			<< index
3020 			;
3021 		return name.str();
3022 	}
3023 
3024 	// Get location size according to the type.
getLocationSizevkt::MeshShader::__anond9d5c6950111::IfaceVar3025 	uint32_t getLocationSize () const
3026 	{
3027 		return ((bitWidth == BitWidth::B64 && dataDim >= DataDim::VEC3) ? 2u : 1u);
3028 	}
3029 
3030 	// Get the variable type in GLSL.
getGLSLTypevkt::MeshShader::__anond9d5c6950111::IfaceVar3031 	std::string getGLSLType () const
3032 	{
3033 		const auto widthStr		= std::to_string(static_cast<int>(bitWidth));
3034 		const auto dimStr		= std::to_string(static_cast<int>(dataDim));
3035 		const auto shortTypeStr	= ((dataType == DataType::INTEGER) ? "i" : "f");
3036 		const auto typeStr		= ((dataType == DataType::INTEGER) ? "int" : "float");
3037 
3038 		if (dataDim == DataDim::SCALAR)
3039 			return typeStr + widthStr + "_t";				// e.g. int32_t or float16_t
3040 		return shortTypeStr + widthStr + "vec" + dimStr;	// e.g. i16vec2 or f64vec4.
3041 	}
3042 
3043 	// Get a simple declaration of type and name. This can be reused for several things.
getTypeAndNamevkt::MeshShader::__anond9d5c6950111::IfaceVar3044 	std::string getTypeAndName () const
3045 	{
3046 		return getGLSLType() + " " + getName();
3047 	}
3048 
getTypeAndNameDeclvkt::MeshShader::__anond9d5c6950111::IfaceVar3049 	std::string getTypeAndNameDecl (bool arrayDecl = false) const
3050 	{
3051 		std::ostringstream decl;
3052 		decl << "    " << getTypeAndName();
3053 		if (arrayDecl)
3054 			decl << "[" << ((owner == Owner::PRIMITIVE) ? IfaceVar::kNumPrimitives : IfaceVar::kNumVertices) << "]";
3055 		decl << ";\n";
3056 		return decl.str();
3057 	}
3058 
3059 	// Variable declaration statement given its location and direction.
getLocationDeclvkt::MeshShader::__anond9d5c6950111::IfaceVar3060 	std::string getLocationDecl (size_t location, Direction direction) const
3061 	{
3062 		std::ostringstream decl;
3063 		decl
3064 			<< "layout (location=" << location << ") "
3065 			<< ((direction == Direction::IN) ? "in" : "out") << " "
3066 			<< ((owner == Owner::PRIMITIVE) ? "perprimitiveNV " : "")
3067 			<< ((interpolation == Interpolation::FLAT) ? "flat " : "")
3068 			<< getTypeAndName()
3069 			<< ((direction == Direction::OUT) ? "[]" : "") << ";\n"
3070 			;
3071 		return decl.str();
3072 	}
3073 
3074 	// Get the name of the source data for this variable. Tests will use a storage buffer for the per-vertex data and a uniform
3075 	// buffer for the per-primitive data. The names in those will match.
getDataSourceNamevkt::MeshShader::__anond9d5c6950111::IfaceVar3076 	std::string getDataSourceName () const
3077 	{
3078 		// per-primitive data or per-vertex data buffers.
3079 		return ((owner == Owner::PRIMITIVE) ? "ppd" : "pvd") + ("." + getName());
3080 	}
3081 
3082 	// Get the boolean check variable name (see below).
getCheckNamevkt::MeshShader::__anond9d5c6950111::IfaceVar3083 	std::string getCheckName () const
3084 	{
3085 		return "good_" + getName();
3086 	}
3087 
3088 	// Get the check statement that would be used in the fragment shader.
getCheckStatementvkt::MeshShader::__anond9d5c6950111::IfaceVar3089 	std::string getCheckStatement () const
3090 	{
3091 		std::ostringstream	check;
3092 		const auto			sourceName	= getDataSourceName();
3093 		const auto			glslType	= getGLSLType();
3094 		const auto			name		= getName();
3095 
3096 		check << "    bool " << getCheckName() << " = ";
3097 		if (owner == Owner::VERTEX)
3098 		{
3099 			// There will be 4 values in the buffers.
3100 			std::ostringstream maxElem;
3101 			std::ostringstream minElem;
3102 
3103 			maxElem << glslType << "(max(max(max(" << sourceName << "[0], " << sourceName << "[1]), " << sourceName  << "[2]), " << sourceName << "[3]))";
3104 			minElem << glslType << "(min(min(min(" << sourceName << "[0], " << sourceName << "[1]), " << sourceName  << "[2]), " << sourceName << "[3]))";
3105 
3106 			if (dataDim == DataDim::SCALAR)
3107 			{
3108 				check << "(" << name << " <= " << maxElem.str() << ") && (" << name << " >= " << minElem.str() << ")";
3109 			}
3110 			else
3111 			{
3112 				check << "all(lessThanEqual(" << name << ", " << maxElem.str() << ")) && "
3113 				      << "all(greaterThanEqual(" << name << ", " << minElem.str() << "))";
3114 			}
3115 		}
3116 		else if (owner == Owner::PRIMITIVE)
3117 		{
3118 			// There will be 2 values in the buffers.
3119 			check << "((gl_PrimitiveID == 0 || gl_PrimitiveID == 1) && ("
3120 			      << "(gl_PrimitiveID == 0 && " << name << " == " << sourceName << "[0]) || "
3121 				  << "(gl_PrimitiveID == 1 && " << name << " == " << sourceName << "[1])))";
3122 		}
3123 		check << ";\n";
3124 
3125 		return check.str();
3126 	}
3127 
3128 	// Get an assignment statement for an out variable.
getAssignmentStatementvkt::MeshShader::__anond9d5c6950111::IfaceVar3129 	std::string getAssignmentStatement (size_t arrayIndex, const std::string& leftPrefix, const std::string& rightPrefix) const
3130 	{
3131 		const auto			name	= getName();
3132 		const auto			typeStr	= getGLSLType();
3133 		std::ostringstream	stmt;
3134 
3135 		stmt << "    " << leftPrefix << (leftPrefix.empty() ? "" : ".") << name << "[" << arrayIndex << "] = " << typeStr << "(" << rightPrefix << (rightPrefix.empty() ? "" : ".") << name << "[" << arrayIndex << "]);\n";
3136 		return stmt.str();
3137 	}
3138 
3139 	// Get the corresponding array size based on the owner (vertex or primitive)
getArraySizevkt::MeshShader::__anond9d5c6950111::IfaceVar3140 	uint32_t getArraySize () const
3141 	{
3142 		return ((owner == Owner::PRIMITIVE) ? IfaceVar::kNumPrimitives : IfaceVar::kNumVertices);
3143 	}
3144 
3145 };
3146 
3147 using IfaceVarVec		= std::vector<IfaceVar>;
3148 using IfaceVarVecPtr	= std::unique_ptr<IfaceVarVec>;
3149 
3150 struct InterfaceVariableParams : public MiscTestParams
3151 {
InterfaceVariableParamsvkt::MeshShader::__anond9d5c6950111::InterfaceVariableParams3152 	InterfaceVariableParams (const tcu::Maybe<uint32_t>& taskCount_, uint32_t meshCount_, uint32_t width_, uint32_t height_,
3153 							 bool useInt64_, bool useFloat64_, bool useInt16_, bool useFloat16_, IfaceVarVecPtr vars_)
3154 		: MiscTestParams	(taskCount_, meshCount_, width_, height_)
3155 		, useInt64			(useInt64_)
3156 		, useFloat64		(useFloat64_)
3157 		, useInt16			(useInt16_)
3158 		, useFloat16		(useFloat16_)
3159 		, ifaceVars			(std::move(vars_))
3160 	{}
3161 
3162 	// These need to match the list of interface variables.
3163 	bool			useInt64;
3164 	bool			useFloat64;
3165 	bool			useInt16;
3166 	bool			useFloat16;
3167 
3168 	IfaceVarVecPtr	ifaceVars;
3169 };
3170 
3171 class InterfaceVariablesCase : public MeshShaderMiscCase
3172 {
3173 public:
InterfaceVariablesCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)3174 					InterfaceVariablesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
3175 						: MeshShaderMiscCase(testCtx, name, description, std::move(params))
3176 						{
3177 
3178 						}
~InterfaceVariablesCase(void)3179 	virtual			~InterfaceVariablesCase		(void) {}
3180 
3181 	TestInstance*	createInstance				(Context& context) const override;
3182 	void			checkSupport				(Context& context) const override;
3183 	void			initPrograms				(vk::SourceCollections& programCollection) const override;
3184 
3185 	// Note data types in the input buffers are always plain floats or ints. They will be converted to the appropriate type when
3186 	// copying them in or out of output variables. Note we have two variables per type, as per IfaceVar::kVarsPerType.
3187 
3188 	struct PerVertexData
3189 	{
3190 		// Interpolated floats.
3191 
3192 		tcu::Vec4	vert_f64d4_inter_0[IfaceVar::kNumVertices];
3193 		tcu::Vec4	vert_f64d4_inter_1[IfaceVar::kNumVertices];
3194 
3195 		tcu::Vec3	vert_f64d3_inter_0[IfaceVar::kNumVertices];
3196 		tcu::Vec3	vert_f64d3_inter_1[IfaceVar::kNumVertices];
3197 
3198 		tcu::Vec2	vert_f64d2_inter_0[IfaceVar::kNumVertices];
3199 		tcu::Vec2	vert_f64d2_inter_1[IfaceVar::kNumVertices];
3200 
3201 		float		vert_f64d1_inter_0[IfaceVar::kNumVertices];
3202 		float		vert_f64d1_inter_1[IfaceVar::kNumVertices];
3203 
3204 		tcu::Vec4	vert_f32d4_inter_0[IfaceVar::kNumVertices];
3205 		tcu::Vec4	vert_f32d4_inter_1[IfaceVar::kNumVertices];
3206 
3207 		tcu::Vec3	vert_f32d3_inter_0[IfaceVar::kNumVertices];
3208 		tcu::Vec3	vert_f32d3_inter_1[IfaceVar::kNumVertices];
3209 
3210 		tcu::Vec2	vert_f32d2_inter_0[IfaceVar::kNumVertices];
3211 		tcu::Vec2	vert_f32d2_inter_1[IfaceVar::kNumVertices];
3212 
3213 		float		vert_f32d1_inter_0[IfaceVar::kNumVertices];
3214 		float		vert_f32d1_inter_1[IfaceVar::kNumVertices];
3215 
3216 		tcu::Vec4	vert_f16d4_inter_0[IfaceVar::kNumVertices];
3217 		tcu::Vec4	vert_f16d4_inter_1[IfaceVar::kNumVertices];
3218 
3219 		tcu::Vec3	vert_f16d3_inter_0[IfaceVar::kNumVertices];
3220 		tcu::Vec3	vert_f16d3_inter_1[IfaceVar::kNumVertices];
3221 
3222 		tcu::Vec2	vert_f16d2_inter_0[IfaceVar::kNumVertices];
3223 		tcu::Vec2	vert_f16d2_inter_1[IfaceVar::kNumVertices];
3224 
3225 		float		vert_f16d1_inter_0[IfaceVar::kNumVertices];
3226 		float		vert_f16d1_inter_1[IfaceVar::kNumVertices];
3227 
3228 		// Flat floats.
3229 
3230 		tcu::Vec4	vert_f64d4_flat_0[IfaceVar::kNumVertices];
3231 		tcu::Vec4	vert_f64d4_flat_1[IfaceVar::kNumVertices];
3232 
3233 		tcu::Vec3	vert_f64d3_flat_0[IfaceVar::kNumVertices];
3234 		tcu::Vec3	vert_f64d3_flat_1[IfaceVar::kNumVertices];
3235 
3236 		tcu::Vec2	vert_f64d2_flat_0[IfaceVar::kNumVertices];
3237 		tcu::Vec2	vert_f64d2_flat_1[IfaceVar::kNumVertices];
3238 
3239 		float		vert_f64d1_flat_0[IfaceVar::kNumVertices];
3240 		float		vert_f64d1_flat_1[IfaceVar::kNumVertices];
3241 
3242 		tcu::Vec4	vert_f32d4_flat_0[IfaceVar::kNumVertices];
3243 		tcu::Vec4	vert_f32d4_flat_1[IfaceVar::kNumVertices];
3244 
3245 		tcu::Vec3	vert_f32d3_flat_0[IfaceVar::kNumVertices];
3246 		tcu::Vec3	vert_f32d3_flat_1[IfaceVar::kNumVertices];
3247 
3248 		tcu::Vec2	vert_f32d2_flat_0[IfaceVar::kNumVertices];
3249 		tcu::Vec2	vert_f32d2_flat_1[IfaceVar::kNumVertices];
3250 
3251 		float		vert_f32d1_flat_0[IfaceVar::kNumVertices];
3252 		float		vert_f32d1_flat_1[IfaceVar::kNumVertices];
3253 
3254 		tcu::Vec4	vert_f16d4_flat_0[IfaceVar::kNumVertices];
3255 		tcu::Vec4	vert_f16d4_flat_1[IfaceVar::kNumVertices];
3256 
3257 		tcu::Vec3	vert_f16d3_flat_0[IfaceVar::kNumVertices];
3258 		tcu::Vec3	vert_f16d3_flat_1[IfaceVar::kNumVertices];
3259 
3260 		tcu::Vec2	vert_f16d2_flat_0[IfaceVar::kNumVertices];
3261 		tcu::Vec2	vert_f16d2_flat_1[IfaceVar::kNumVertices];
3262 
3263 		float		vert_f16d1_flat_0[IfaceVar::kNumVertices];
3264 		float		vert_f16d1_flat_1[IfaceVar::kNumVertices];
3265 
3266 		// Flat ints.
3267 
3268 		tcu::IVec4	vert_i64d4_flat_0[IfaceVar::kNumVertices];
3269 		tcu::IVec4	vert_i64d4_flat_1[IfaceVar::kNumVertices];
3270 
3271 		tcu::IVec3	vert_i64d3_flat_0[IfaceVar::kNumVertices];
3272 		tcu::IVec3	vert_i64d3_flat_1[IfaceVar::kNumVertices];
3273 
3274 		tcu::IVec2	vert_i64d2_flat_0[IfaceVar::kNumVertices];
3275 		tcu::IVec2	vert_i64d2_flat_1[IfaceVar::kNumVertices];
3276 
3277 		int32_t		vert_i64d1_flat_0[IfaceVar::kNumVertices];
3278 		int32_t		vert_i64d1_flat_1[IfaceVar::kNumVertices];
3279 
3280 		tcu::IVec4	vert_i32d4_flat_0[IfaceVar::kNumVertices];
3281 		tcu::IVec4	vert_i32d4_flat_1[IfaceVar::kNumVertices];
3282 
3283 		tcu::IVec3	vert_i32d3_flat_0[IfaceVar::kNumVertices];
3284 		tcu::IVec3	vert_i32d3_flat_1[IfaceVar::kNumVertices];
3285 
3286 		tcu::IVec2	vert_i32d2_flat_0[IfaceVar::kNumVertices];
3287 		tcu::IVec2	vert_i32d2_flat_1[IfaceVar::kNumVertices];
3288 
3289 		int32_t		vert_i32d1_flat_0[IfaceVar::kNumVertices];
3290 		int32_t		vert_i32d1_flat_1[IfaceVar::kNumVertices];
3291 
3292 		tcu::IVec4	vert_i16d4_flat_0[IfaceVar::kNumVertices];
3293 		tcu::IVec4	vert_i16d4_flat_1[IfaceVar::kNumVertices];
3294 
3295 		tcu::IVec3	vert_i16d3_flat_0[IfaceVar::kNumVertices];
3296 		tcu::IVec3	vert_i16d3_flat_1[IfaceVar::kNumVertices];
3297 
3298 		tcu::IVec2	vert_i16d2_flat_0[IfaceVar::kNumVertices];
3299 		tcu::IVec2	vert_i16d2_flat_1[IfaceVar::kNumVertices];
3300 
3301 		int32_t		vert_i16d1_flat_0[IfaceVar::kNumVertices];
3302 		int32_t		vert_i16d1_flat_1[IfaceVar::kNumVertices];
3303 
3304 	};
3305 
3306 	struct PerPrimitiveData
3307 	{
3308 		// Flat floats.
3309 
3310 		tcu::Vec4	prim_f64d4_flat_0[IfaceVar::kNumPrimitives];
3311 		tcu::Vec4	prim_f64d4_flat_1[IfaceVar::kNumPrimitives];
3312 
3313 		tcu::Vec3	prim_f64d3_flat_0[IfaceVar::kNumPrimitives];
3314 		tcu::Vec3	prim_f64d3_flat_1[IfaceVar::kNumPrimitives];
3315 
3316 		tcu::Vec2	prim_f64d2_flat_0[IfaceVar::kNumPrimitives];
3317 		tcu::Vec2	prim_f64d2_flat_1[IfaceVar::kNumPrimitives];
3318 
3319 		float		prim_f64d1_flat_0[IfaceVar::kNumPrimitives];
3320 		float		prim_f64d1_flat_1[IfaceVar::kNumPrimitives];
3321 
3322 		tcu::Vec4	prim_f32d4_flat_0[IfaceVar::kNumPrimitives];
3323 		tcu::Vec4	prim_f32d4_flat_1[IfaceVar::kNumPrimitives];
3324 
3325 		tcu::Vec3	prim_f32d3_flat_0[IfaceVar::kNumPrimitives];
3326 		tcu::Vec3	prim_f32d3_flat_1[IfaceVar::kNumPrimitives];
3327 
3328 		tcu::Vec2	prim_f32d2_flat_0[IfaceVar::kNumPrimitives];
3329 		tcu::Vec2	prim_f32d2_flat_1[IfaceVar::kNumPrimitives];
3330 
3331 		float		prim_f32d1_flat_0[IfaceVar::kNumPrimitives];
3332 		float		prim_f32d1_flat_1[IfaceVar::kNumPrimitives];
3333 
3334 		tcu::Vec4	prim_f16d4_flat_0[IfaceVar::kNumPrimitives];
3335 		tcu::Vec4	prim_f16d4_flat_1[IfaceVar::kNumPrimitives];
3336 
3337 		tcu::Vec3	prim_f16d3_flat_0[IfaceVar::kNumPrimitives];
3338 		tcu::Vec3	prim_f16d3_flat_1[IfaceVar::kNumPrimitives];
3339 
3340 		tcu::Vec2	prim_f16d2_flat_0[IfaceVar::kNumPrimitives];
3341 		tcu::Vec2	prim_f16d2_flat_1[IfaceVar::kNumPrimitives];
3342 
3343 		float		prim_f16d1_flat_0[IfaceVar::kNumPrimitives];
3344 		float		prim_f16d1_flat_1[IfaceVar::kNumPrimitives];
3345 
3346 		// Flat ints.
3347 
3348 		tcu::IVec4	prim_i64d4_flat_0[IfaceVar::kNumPrimitives];
3349 		tcu::IVec4	prim_i64d4_flat_1[IfaceVar::kNumPrimitives];
3350 
3351 		tcu::IVec3	prim_i64d3_flat_0[IfaceVar::kNumPrimitives];
3352 		tcu::IVec3	prim_i64d3_flat_1[IfaceVar::kNumPrimitives];
3353 
3354 		tcu::IVec2	prim_i64d2_flat_0[IfaceVar::kNumPrimitives];
3355 		tcu::IVec2	prim_i64d2_flat_1[IfaceVar::kNumPrimitives];
3356 
3357 		int32_t		prim_i64d1_flat_0[IfaceVar::kNumPrimitives];
3358 		int32_t		prim_i64d1_flat_1[IfaceVar::kNumPrimitives];
3359 
3360 		tcu::IVec4	prim_i32d4_flat_0[IfaceVar::kNumPrimitives];
3361 		tcu::IVec4	prim_i32d4_flat_1[IfaceVar::kNumPrimitives];
3362 
3363 		tcu::IVec3	prim_i32d3_flat_0[IfaceVar::kNumPrimitives];
3364 		tcu::IVec3	prim_i32d3_flat_1[IfaceVar::kNumPrimitives];
3365 
3366 		tcu::IVec2	prim_i32d2_flat_0[IfaceVar::kNumPrimitives];
3367 		tcu::IVec2	prim_i32d2_flat_1[IfaceVar::kNumPrimitives];
3368 
3369 		int32_t		prim_i32d1_flat_0[IfaceVar::kNumPrimitives];
3370 		int32_t		prim_i32d1_flat_1[IfaceVar::kNumPrimitives];
3371 
3372 		tcu::IVec4	prim_i16d4_flat_0[IfaceVar::kNumPrimitives];
3373 		tcu::IVec4	prim_i16d4_flat_1[IfaceVar::kNumPrimitives];
3374 
3375 		tcu::IVec3	prim_i16d3_flat_0[IfaceVar::kNumPrimitives];
3376 		tcu::IVec3	prim_i16d3_flat_1[IfaceVar::kNumPrimitives];
3377 
3378 		tcu::IVec2	prim_i16d2_flat_0[IfaceVar::kNumPrimitives];
3379 		tcu::IVec2	prim_i16d2_flat_1[IfaceVar::kNumPrimitives];
3380 
3381 		int32_t		prim_i16d1_flat_0[IfaceVar::kNumPrimitives];
3382 		int32_t		prim_i16d1_flat_1[IfaceVar::kNumPrimitives];
3383 
3384 	};
3385 
3386 	static constexpr uint32_t kGlslangBuiltInCount	= 11u;
3387 	static constexpr uint32_t kMaxLocations			= 16u;
3388 };
3389 
3390 class InterfaceVariablesInstance : public MeshShaderMiscInstance
3391 {
3392 public:
InterfaceVariablesInstance(Context & context,const MiscTestParams * params)3393 						InterfaceVariablesInstance	(Context& context, const MiscTestParams* params)
3394 							: MeshShaderMiscInstance(context, params) {}
~InterfaceVariablesInstance(void)3395 	virtual				~InterfaceVariablesInstance	(void) {}
3396 
3397 	void				generateReferenceLevel		() override;
3398 	tcu::TestStatus		iterate						(void) override;
3399 };
3400 
createInstance(Context & context) const3401 TestInstance* InterfaceVariablesCase::createInstance (Context& context) const
3402 {
3403 	return new InterfaceVariablesInstance(context, m_params.get());
3404 }
3405 
checkSupport(Context & context) const3406 void InterfaceVariablesCase::checkSupport (Context& context) const
3407 {
3408 	const auto params = dynamic_cast<InterfaceVariableParams*>(m_params.get());
3409 	DE_ASSERT(params);
3410 
3411 	MeshShaderMiscCase::checkSupport(context);
3412 
3413 	if (params->useFloat64)
3414 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_FLOAT64);
3415 
3416 	if (params->useInt64)
3417 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_INT64);
3418 
3419 	if (params->useInt16)
3420 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_INT16);
3421 
3422 	if (params->useFloat16)
3423 	{
3424 		const auto& features = context.getShaderFloat16Int8Features();
3425 		if (!features.shaderFloat16)
3426 			TCU_THROW(NotSupportedError, "shaderFloat16 feature not supported");
3427 	}
3428 
3429 	if (params->useInt16 || params->useFloat16)
3430 	{
3431 		const auto& features = context.get16BitStorageFeatures();
3432 		if (!features.storageInputOutput16)
3433 			TCU_THROW(NotSupportedError, "storageInputOutput16 feature not supported");
3434 	}
3435 
3436 	// glslang will use several built-ins in the generated mesh code, which count against the location and component limits.
3437 	{
3438 		const auto	neededComponents	= (kGlslangBuiltInCount + kMaxLocations) * 4u;
3439 		const auto&	properties			= context.getDeviceProperties();
3440 
3441 		if (neededComponents > properties.limits.maxFragmentInputComponents)
3442 			TCU_THROW(NotSupportedError, "maxFragmentInputComponents too low to run this test");
3443 	}
3444 }
3445 
initPrograms(vk::SourceCollections & programCollection) const3446 void InterfaceVariablesCase::initPrograms (vk::SourceCollections& programCollection) const
3447 {
3448 	// Bindings needs to match the PerVertexData and PerPrimitiveData structures.
3449 	std::ostringstream bindings;
3450 	bindings
3451 		<< "layout(set=0, binding=0, std430) readonly buffer PerVertexBlock {\n"
3452 		<< "    vec4   vert_f64d4_inter_0[" << IfaceVar::kNumVertices << "];\n"
3453 		<< "    vec4   vert_f64d4_inter_1[" << IfaceVar::kNumVertices << "];\n"
3454 		<< "    vec3   vert_f64d3_inter_0[" << IfaceVar::kNumVertices << "];\n"
3455 		<< "    vec3   vert_f64d3_inter_1[" << IfaceVar::kNumVertices << "];\n"
3456 		<< "    vec2   vert_f64d2_inter_0[" << IfaceVar::kNumVertices << "];\n"
3457 		<< "    vec2   vert_f64d2_inter_1[" << IfaceVar::kNumVertices << "];\n"
3458 		<< "    float  vert_f64d1_inter_0[" << IfaceVar::kNumVertices << "];\n"
3459 		<< "    float  vert_f64d1_inter_1[" << IfaceVar::kNumVertices << "];\n"
3460 		<< "    vec4   vert_f32d4_inter_0[" << IfaceVar::kNumVertices << "];\n"
3461 		<< "    vec4   vert_f32d4_inter_1[" << IfaceVar::kNumVertices << "];\n"
3462 		<< "    vec3   vert_f32d3_inter_0[" << IfaceVar::kNumVertices << "];\n"
3463 		<< "    vec3   vert_f32d3_inter_1[" << IfaceVar::kNumVertices << "];\n"
3464 		<< "    vec2   vert_f32d2_inter_0[" << IfaceVar::kNumVertices << "];\n"
3465 		<< "    vec2   vert_f32d2_inter_1[" << IfaceVar::kNumVertices << "];\n"
3466 		<< "    float  vert_f32d1_inter_0[" << IfaceVar::kNumVertices << "];\n"
3467 		<< "    float  vert_f32d1_inter_1[" << IfaceVar::kNumVertices << "];\n"
3468 		<< "    vec4   vert_f16d4_inter_0[" << IfaceVar::kNumVertices << "];\n"
3469 		<< "    vec4   vert_f16d4_inter_1[" << IfaceVar::kNumVertices << "];\n"
3470 		<< "    vec3   vert_f16d3_inter_0[" << IfaceVar::kNumVertices << "];\n"
3471 		<< "    vec3   vert_f16d3_inter_1[" << IfaceVar::kNumVertices << "];\n"
3472 		<< "    vec2   vert_f16d2_inter_0[" << IfaceVar::kNumVertices << "];\n"
3473 		<< "    vec2   vert_f16d2_inter_1[" << IfaceVar::kNumVertices << "];\n"
3474 		<< "    float  vert_f16d1_inter_0[" << IfaceVar::kNumVertices << "];\n"
3475 		<< "    float  vert_f16d1_inter_1[" << IfaceVar::kNumVertices << "];\n"
3476 		<< "    vec4   vert_f64d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3477 		<< "    vec4   vert_f64d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3478 		<< "    vec3   vert_f64d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3479 		<< "    vec3   vert_f64d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3480 		<< "    vec2   vert_f64d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3481 		<< "    vec2   vert_f64d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3482 		<< "    float  vert_f64d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3483 		<< "    float  vert_f64d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3484 		<< "    vec4   vert_f32d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3485 		<< "    vec4   vert_f32d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3486 		<< "    vec3   vert_f32d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3487 		<< "    vec3   vert_f32d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3488 		<< "    vec2   vert_f32d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3489 		<< "    vec2   vert_f32d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3490 		<< "    float  vert_f32d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3491 		<< "    float  vert_f32d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3492 		<< "    vec4   vert_f16d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3493 		<< "    vec4   vert_f16d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3494 		<< "    vec3   vert_f16d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3495 		<< "    vec3   vert_f16d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3496 		<< "    vec2   vert_f16d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3497 		<< "    vec2   vert_f16d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3498 		<< "    float  vert_f16d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3499 		<< "    float  vert_f16d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3500 		<< "    ivec4  vert_i64d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3501 		<< "    ivec4  vert_i64d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3502 		<< "    ivec3  vert_i64d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3503 		<< "    ivec3  vert_i64d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3504 		<< "    ivec2  vert_i64d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3505 		<< "    ivec2  vert_i64d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3506 		<< "    int    vert_i64d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3507 		<< "    int    vert_i64d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3508 		<< "    ivec4  vert_i32d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3509 		<< "    ivec4  vert_i32d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3510 		<< "    ivec3  vert_i32d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3511 		<< "    ivec3  vert_i32d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3512 		<< "    ivec2  vert_i32d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3513 		<< "    ivec2  vert_i32d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3514 		<< "    int    vert_i32d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3515 		<< "    int    vert_i32d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3516 		<< "    ivec4  vert_i16d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3517 		<< "    ivec4  vert_i16d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3518 		<< "    ivec3  vert_i16d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3519 		<< "    ivec3  vert_i16d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3520 		<< "    ivec2  vert_i16d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3521 		<< "    ivec2  vert_i16d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3522 		<< "    int    vert_i16d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3523 		<< "    int    vert_i16d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3524 		<< " } pvd;\n"
3525 		<< "\n"
3526 		<< "layout(set=0, binding=1, std430) readonly buffer PerPrimitiveBlock {\n"
3527 		<< "    vec4   prim_f64d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3528 		<< "    vec4   prim_f64d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3529 		<< "    vec3   prim_f64d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3530 		<< "    vec3   prim_f64d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3531 		<< "    vec2   prim_f64d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3532 		<< "    vec2   prim_f64d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3533 		<< "    float  prim_f64d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3534 		<< "    float  prim_f64d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3535 		<< "    vec4   prim_f32d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3536 		<< "    vec4   prim_f32d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3537 		<< "    vec3   prim_f32d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3538 		<< "    vec3   prim_f32d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3539 		<< "    vec2   prim_f32d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3540 		<< "    vec2   prim_f32d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3541 		<< "    float  prim_f32d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3542 		<< "    float  prim_f32d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3543 		<< "    vec4   prim_f16d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3544 		<< "    vec4   prim_f16d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3545 		<< "    vec3   prim_f16d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3546 		<< "    vec3   prim_f16d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3547 		<< "    vec2   prim_f16d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3548 		<< "    vec2   prim_f16d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3549 		<< "    float  prim_f16d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3550 		<< "    float  prim_f16d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3551 		<< "    ivec4  prim_i64d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3552 		<< "    ivec4  prim_i64d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3553 		<< "    ivec3  prim_i64d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3554 		<< "    ivec3  prim_i64d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3555 		<< "    ivec2  prim_i64d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3556 		<< "    ivec2  prim_i64d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3557 		<< "    int    prim_i64d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3558 		<< "    int    prim_i64d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3559 		<< "    ivec4  prim_i32d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3560 		<< "    ivec4  prim_i32d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3561 		<< "    ivec3  prim_i32d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3562 		<< "    ivec3  prim_i32d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3563 		<< "    ivec2  prim_i32d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3564 		<< "    ivec2  prim_i32d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3565 		<< "    int    prim_i32d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3566 		<< "    int    prim_i32d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3567 		<< "    ivec4  prim_i16d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3568 		<< "    ivec4  prim_i16d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3569 		<< "    ivec3  prim_i16d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3570 		<< "    ivec3  prim_i16d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3571 		<< "    ivec2  prim_i16d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3572 		<< "    ivec2  prim_i16d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3573 		<< "    int    prim_i16d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3574 		<< "    int    prim_i16d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3575 		<< " } ppd;\n"
3576 		<< "\n"
3577 		;
3578 	const auto bindingsDecl = bindings.str();
3579 
3580 	const auto	params	= dynamic_cast<InterfaceVariableParams*>(m_params.get());
3581 	DE_ASSERT(params);
3582 	const auto&	varVec	= *(params->ifaceVars);
3583 
3584 	std::ostringstream frag;
3585 	frag
3586 		<< "#version 450\n"
3587 		<< "#extension GL_NV_mesh_shader : enable\n"
3588 		<< "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
3589 		<< "\n"
3590 		<< bindingsDecl
3591 		;
3592 
3593 	// Declare interface variables as Input in the fragment shader.
3594 	{
3595 		uint32_t usedLocations = 0u;
3596 		for (const auto& var : varVec)
3597 		{
3598 			frag << var.getLocationDecl(usedLocations, Direction::IN);
3599 			usedLocations += var.getLocationSize();
3600 		}
3601 	}
3602 
3603 	frag
3604 		<< "\n"
3605 		<< "layout (location=0) out vec4 outColor;\n"
3606 		<< "\n"
3607 		<< "void main ()\n"
3608 		<< "{\n"
3609 		;
3610 
3611 	// Emit checks for each variable value in the fragment shader.
3612 	std::ostringstream allConditions;
3613 
3614 	for (size_t i = 0; i < varVec.size(); ++i)
3615 	{
3616 		frag << varVec[i].getCheckStatement();
3617 		allConditions << ((i == 0) ? "" : " && ") << varVec[i].getCheckName();
3618 	}
3619 
3620 	// Emit final check.
3621 	frag
3622 		<< "    if (" << allConditions.str() << ") {\n"
3623 		<< "        outColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
3624 		<< "    } else {\n"
3625 		<< "        outColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
3626 		<< "    }\n"
3627 		<< "}\n"
3628 		;
3629 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
3630 
3631 	std::ostringstream pvdDataDeclStream;
3632 	pvdDataDeclStream
3633 		<< "    vec4 positions[4];\n"
3634 		<< "    float pointSizes[4];\n"
3635 		<< "    float clipDistances[4];\n"
3636 		<< "    vec4 custom1[4];\n"
3637 		<< "    float custom2[4];\n"
3638 		<< "    int custom3[4];\n"
3639 		;
3640 	const auto pvdDataDecl = pvdDataDeclStream.str();
3641 
3642 	std::ostringstream ppdDataDeclStream;
3643 	ppdDataDeclStream
3644 		<< "    int primitiveIds[2];\n"
3645 		<< "    int viewportIndices[2];\n"
3646 		<< "    uvec4 custom4[2];\n"
3647 		<< "    float custom5[2];\n"
3648 		;
3649 	const auto ppdDataDecl = ppdDataDeclStream.str();
3650 
3651 	std::ostringstream taskDataStream;
3652 	taskDataStream << "taskNV TaskData {\n";
3653 	for (size_t i = 0; i < varVec.size(); ++i)
3654 		taskDataStream << varVec[i].getTypeAndNameDecl(/*arrayDecl*/true);
3655 	taskDataStream << "} td;\n\n";
3656 
3657 	const auto taskShader		= m_params->needsTaskShader();
3658 	const auto taskDataDecl		= taskDataStream.str();
3659 	const auto meshPvdPrefix	= (taskShader ? "td" : "pvd");
3660 	const auto meshPpdPrefix	= (taskShader ? "td" : "ppd");
3661 
3662 	std::ostringstream mesh;
3663 	mesh
3664 		<< "#version 450\n"
3665 		<< "#extension GL_NV_mesh_shader : enable\n"
3666 		<< "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
3667 		<< "\n"
3668 		<< "layout (local_size_x=1) in;\n"
3669 		<< "layout (max_primitives=" << IfaceVar::kNumPrimitives << ", max_vertices=" << IfaceVar::kNumVertices << ") out;\n"
3670 		<< "layout (triangles) out;\n"
3671 		<< "\n"
3672 		;
3673 
3674 	// Declare interface variables as Output variables.
3675 	{
3676 		uint32_t usedLocations = 0u;
3677 		for (const auto& var : varVec)
3678 		{
3679 			mesh << var.getLocationDecl(usedLocations, Direction::OUT);
3680 			usedLocations += var.getLocationSize();
3681 		}
3682 	}
3683 
3684 	mesh
3685 		<< "out gl_MeshPerVertexNV {\n"
3686 		<< "   vec4  gl_Position;\n"
3687 		<< "} gl_MeshVerticesNV[];\n"
3688 		<< "out perprimitiveNV gl_MeshPerPrimitiveNV {\n"
3689 		<< "  int gl_PrimitiveID;\n"
3690 		<< "} gl_MeshPrimitivesNV[];\n"
3691 		<< "\n"
3692 		<< (taskShader ? "in " + taskDataDecl : bindingsDecl)
3693 		<< "vec4 positions[" << IfaceVar::kNumVertices << "] = vec4[](\n"
3694 		<< "    vec4(-1.0, -1.0, 0.0, 1.0),\n"
3695 		<< "    vec4( 1.0, -1.0, 0.0, 1.0),\n"
3696 		<< "    vec4(-1.0,  1.0, 0.0, 1.0),\n"
3697 		<< "    vec4( 1.0,  1.0, 0.0, 1.0)\n"
3698 		<< ");\n"
3699 		<< "\n"
3700 		<< "int indices[" << (IfaceVar::kNumPrimitives * 3u) << "] = int[](\n"
3701 		<< "    0, 1, 2, 2, 3, 1\n"
3702 		<< ");\n"
3703 		<< "\n"
3704 		<< "void main ()\n"
3705 		<< "{\n"
3706 		<< "    gl_PrimitiveCountNV = " << IfaceVar::kNumPrimitives << ";\n"
3707 		<< "\n"
3708 		;
3709 
3710 	// Emit positions, indices and primitive IDs.
3711 	for (uint32_t i = 0; i < IfaceVar::kNumVertices; ++i)
3712 		mesh << "    gl_MeshVerticesNV[" << i << "].gl_Position = positions[" << i << "];\n";
3713 	mesh << "\n";
3714 
3715 	for (uint32_t i = 0; i < IfaceVar::kNumPrimitives; ++i)
3716 	for (uint32_t j = 0; j < 3u; ++j) // 3 vertices per triangle
3717 	{
3718 		const auto arrayPos = i*3u + j;
3719 		mesh << "    gl_PrimitiveIndicesNV[" << arrayPos << "] = indices[" << arrayPos << "];\n";
3720 	}
3721 	mesh << "\n";
3722 
3723 	for (uint32_t i = 0; i < IfaceVar::kNumPrimitives; ++i)
3724 		mesh << "    gl_MeshPrimitivesNV[" << i << "].gl_PrimitiveID = " << i << ";\n";
3725 	mesh << "\n";
3726 
3727 	// Copy data to output variables, either from the task data or the bindings.
3728 	for (size_t i = 0; i < varVec.size(); ++i)
3729 	{
3730 		const auto arraySize	= varVec[i].getArraySize();
3731 		const auto prefix		= ((varVec[i].owner == Owner::VERTEX) ? meshPvdPrefix : meshPpdPrefix);
3732 		for (uint32_t arrayIndex = 0u; arrayIndex < arraySize; ++arrayIndex)
3733 			mesh << varVec[i].getAssignmentStatement(arrayIndex, "", prefix);
3734 	}
3735 
3736 	mesh
3737 		<< "\n"
3738 		<< "}\n"
3739 		;
3740 
3741 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
3742 
3743 	// Task shader if needed.
3744 	if (taskShader)
3745 	{
3746 		const auto& meshCount		= m_params->meshCount;
3747 		const auto	taskPvdPrefix	= "pvd";
3748 		const auto	taskPpdPrefix	= "ppd";
3749 
3750 		std::ostringstream task;
3751 		task
3752 			<< "#version 450\n"
3753 			<< "#extension GL_NV_mesh_shader : enable\n"
3754 			<< "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
3755 			<< "\n"
3756 			<< "out " << taskDataDecl
3757 			<< bindingsDecl
3758 			<< "void main ()\n"
3759 			<< "{\n"
3760 			<< "    gl_TaskCountNV = " << meshCount << ";\n"
3761 			<< "\n"
3762 			;
3763 
3764 		// Copy data from bindings to the task data structure.
3765 		for (size_t i = 0; i < varVec.size(); ++i)
3766 		{
3767 			const auto arraySize	= varVec[i].getArraySize();
3768 			const auto prefix		= ((varVec[i].owner == Owner::VERTEX) ? taskPvdPrefix : taskPpdPrefix);
3769 
3770 			for (uint32_t arrayIndex = 0u; arrayIndex < arraySize; ++arrayIndex)
3771 				task << varVec[i].getAssignmentStatement(arrayIndex, "td", prefix);
3772 		}
3773 
3774 		task << "}\n";
3775 		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
3776 	}
3777 }
3778 
generateReferenceLevel()3779 void InterfaceVariablesInstance::generateReferenceLevel ()
3780 {
3781 	const auto format		= getOutputFormat();
3782 	const auto tcuFormat	= mapVkFormat(format);
3783 
3784 	const auto iWidth		= static_cast<int>(m_params->width);
3785 	const auto iHeight		= static_cast<int>(m_params->height);
3786 
3787 	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
3788 
3789 	const auto access		= m_referenceLevel->getAccess();
3790 	const auto blueColor	= tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
3791 
3792 	tcu::clear(access, blueColor);
3793 }
3794 
iterate()3795 tcu::TestStatus InterfaceVariablesInstance::iterate ()
3796 {
3797 	const auto&		vkd			= m_context.getDeviceInterface();
3798 	const auto		device		= m_context.getDevice();
3799 	auto&			alloc		= m_context.getDefaultAllocator();
3800 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
3801 	const auto		queue		= m_context.getUniversalQueue();
3802 
3803 	const auto		imageFormat	= getOutputFormat();
3804 	const auto		tcuFormat	= mapVkFormat(imageFormat);
3805 	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
3806 	const auto		imageUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3807 
3808 	const auto&		binaries	= m_context.getBinaryCollection();
3809 	const auto		hasTask		= binaries.contains("task");
3810 	const auto		bufStages	= (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_MESH_BIT_NV | (hasTask ? VK_SHADER_STAGE_TASK_BIT_NV : 0));
3811 
3812 	const VkImageCreateInfo colorBufferInfo =
3813 	{
3814 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
3815 		nullptr,								//	const void*				pNext;
3816 		0u,										//	VkImageCreateFlags		flags;
3817 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
3818 		imageFormat,							//	VkFormat				format;
3819 		imageExtent,							//	VkExtent3D				extent;
3820 		1u,										//	uint32_t				mipLevels;
3821 		1u,										//	uint32_t				arrayLayers;
3822 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
3823 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
3824 		imageUsage,								//	VkImageUsageFlags		usage;
3825 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
3826 		0u,										//	uint32_t				queueFamilyIndexCount;
3827 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
3828 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
3829 	};
3830 
3831 	// Create color image and view.
3832 	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
3833 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
3834 	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
3835 	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
3836 
3837 	// Create a memory buffer for verification.
3838 	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
3839 	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3840 	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
3841 
3842 	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
3843 	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
3844 	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
3845 
3846 	// Bindings data.
3847 	// The initialization statements below were generated automatically with a Python script.
3848 	// Note: it works with stdin/stdout.
3849 #if 0
3850 import re
3851 import sys
3852 
3853 # Lines look like: tcu::Vec4 vert_f64d4_inter_0[IfaceVar::kNumVertices];
3854 lineRE = re.compile(r'^\s*(\S+)\s+(\w+)\[(\S+)\];.*$')
3855 vecRE = re.compile(r'^.*Vec(\d)$')
3856 floatSuffixes = (
3857     (0.25, 0.50, 0.875, 0.0),
3858     (0.25, 0.75, 0.875, 0.0),
3859     (0.50, 0.50, 0.875, 0.0),
3860     (0.50, 0.75, 0.875, 0.0),
3861 )
3862 lineCounter = 0
3863 
3864 for line in sys.stdin:
3865     match = lineRE.search(line)
3866     if not match:
3867         continue
3868 
3869     varType = match.group(1)
3870     varName = match.group(2)
3871     varSize = match.group(3)
3872 
3873     arraySize = (4 if varSize == 'IfaceVar::kNumVertices' else 2)
3874     vecMatch = vecRE.match(varType)
3875     numComponents = (1 if not vecMatch else vecMatch.group(1))
3876     isFlat = '_flat_' in varName
3877 
3878     lineCounter += 1
3879     varBaseVal = 1000 + 10 * lineCounter
3880     valueTemplate = ('%s' if numComponents == 1 else '%s(%%s)' % (varType,))
3881 
3882     for index in range(arraySize):
3883         valueStr = ''
3884         for comp in range(numComponents):
3885             compValue = varBaseVal + comp + 1
3886             if not isFlat:
3887                 compValue += floatSuffixes[index][comp]
3888             valueStr += ('' if comp == 0 else ', ') + str(compValue)
3889         value = valueTemplate % (valueStr,)
3890         statement = '%s[%s] = %s;' % (varName, index, value)
3891         print('%s' % (statement,))
3892 #endif
3893 	InterfaceVariablesCase::PerVertexData perVertexData;
3894 	{
3895 		perVertexData.vert_f64d4_inter_0[0] = tcu::Vec4(1011.25, 1012.5, 1013.875, 1014.0);
3896 		perVertexData.vert_f64d4_inter_0[1] = tcu::Vec4(1011.25, 1012.75, 1013.875, 1014.0);
3897 		perVertexData.vert_f64d4_inter_0[2] = tcu::Vec4(1011.5, 1012.5, 1013.875, 1014.0);
3898 		perVertexData.vert_f64d4_inter_0[3] = tcu::Vec4(1011.5, 1012.75, 1013.875, 1014.0);
3899 		perVertexData.vert_f64d4_inter_1[0] = tcu::Vec4(1021.25, 1022.5, 1023.875, 1024.0);
3900 		perVertexData.vert_f64d4_inter_1[1] = tcu::Vec4(1021.25, 1022.75, 1023.875, 1024.0);
3901 		perVertexData.vert_f64d4_inter_1[2] = tcu::Vec4(1021.5, 1022.5, 1023.875, 1024.0);
3902 		perVertexData.vert_f64d4_inter_1[3] = tcu::Vec4(1021.5, 1022.75, 1023.875, 1024.0);
3903 		perVertexData.vert_f64d3_inter_0[0] = tcu::Vec3(1031.25, 1032.5, 1033.875);
3904 		perVertexData.vert_f64d3_inter_0[1] = tcu::Vec3(1031.25, 1032.75, 1033.875);
3905 		perVertexData.vert_f64d3_inter_0[2] = tcu::Vec3(1031.5, 1032.5, 1033.875);
3906 		perVertexData.vert_f64d3_inter_0[3] = tcu::Vec3(1031.5, 1032.75, 1033.875);
3907 		perVertexData.vert_f64d3_inter_1[0] = tcu::Vec3(1041.25, 1042.5, 1043.875);
3908 		perVertexData.vert_f64d3_inter_1[1] = tcu::Vec3(1041.25, 1042.75, 1043.875);
3909 		perVertexData.vert_f64d3_inter_1[2] = tcu::Vec3(1041.5, 1042.5, 1043.875);
3910 		perVertexData.vert_f64d3_inter_1[3] = tcu::Vec3(1041.5, 1042.75, 1043.875);
3911 		perVertexData.vert_f64d2_inter_0[0] = tcu::Vec2(1051.25, 1052.5);
3912 		perVertexData.vert_f64d2_inter_0[1] = tcu::Vec2(1051.25, 1052.75);
3913 		perVertexData.vert_f64d2_inter_0[2] = tcu::Vec2(1051.5, 1052.5);
3914 		perVertexData.vert_f64d2_inter_0[3] = tcu::Vec2(1051.5, 1052.75);
3915 		perVertexData.vert_f64d2_inter_1[0] = tcu::Vec2(1061.25, 1062.5);
3916 		perVertexData.vert_f64d2_inter_1[1] = tcu::Vec2(1061.25, 1062.75);
3917 		perVertexData.vert_f64d2_inter_1[2] = tcu::Vec2(1061.5, 1062.5);
3918 		perVertexData.vert_f64d2_inter_1[3] = tcu::Vec2(1061.5, 1062.75);
3919 		perVertexData.vert_f64d1_inter_0[0] = 1071.25;
3920 		perVertexData.vert_f64d1_inter_0[1] = 1071.25;
3921 		perVertexData.vert_f64d1_inter_0[2] = 1071.5;
3922 		perVertexData.vert_f64d1_inter_0[3] = 1071.5;
3923 		perVertexData.vert_f64d1_inter_1[0] = 1081.25;
3924 		perVertexData.vert_f64d1_inter_1[1] = 1081.25;
3925 		perVertexData.vert_f64d1_inter_1[2] = 1081.5;
3926 		perVertexData.vert_f64d1_inter_1[3] = 1081.5;
3927 		perVertexData.vert_f32d4_inter_0[0] = tcu::Vec4(1091.25, 1092.5, 1093.875, 1094.0);
3928 		perVertexData.vert_f32d4_inter_0[1] = tcu::Vec4(1091.25, 1092.75, 1093.875, 1094.0);
3929 		perVertexData.vert_f32d4_inter_0[2] = tcu::Vec4(1091.5, 1092.5, 1093.875, 1094.0);
3930 		perVertexData.vert_f32d4_inter_0[3] = tcu::Vec4(1091.5, 1092.75, 1093.875, 1094.0);
3931 		perVertexData.vert_f32d4_inter_1[0] = tcu::Vec4(1101.25, 1102.5, 1103.875, 1104.0);
3932 		perVertexData.vert_f32d4_inter_1[1] = tcu::Vec4(1101.25, 1102.75, 1103.875, 1104.0);
3933 		perVertexData.vert_f32d4_inter_1[2] = tcu::Vec4(1101.5, 1102.5, 1103.875, 1104.0);
3934 		perVertexData.vert_f32d4_inter_1[3] = tcu::Vec4(1101.5, 1102.75, 1103.875, 1104.0);
3935 		perVertexData.vert_f32d3_inter_0[0] = tcu::Vec3(1111.25, 1112.5, 1113.875);
3936 		perVertexData.vert_f32d3_inter_0[1] = tcu::Vec3(1111.25, 1112.75, 1113.875);
3937 		perVertexData.vert_f32d3_inter_0[2] = tcu::Vec3(1111.5, 1112.5, 1113.875);
3938 		perVertexData.vert_f32d3_inter_0[3] = tcu::Vec3(1111.5, 1112.75, 1113.875);
3939 		perVertexData.vert_f32d3_inter_1[0] = tcu::Vec3(1121.25, 1122.5, 1123.875);
3940 		perVertexData.vert_f32d3_inter_1[1] = tcu::Vec3(1121.25, 1122.75, 1123.875);
3941 		perVertexData.vert_f32d3_inter_1[2] = tcu::Vec3(1121.5, 1122.5, 1123.875);
3942 		perVertexData.vert_f32d3_inter_1[3] = tcu::Vec3(1121.5, 1122.75, 1123.875);
3943 		perVertexData.vert_f32d2_inter_0[0] = tcu::Vec2(1131.25, 1132.5);
3944 		perVertexData.vert_f32d2_inter_0[1] = tcu::Vec2(1131.25, 1132.75);
3945 		perVertexData.vert_f32d2_inter_0[2] = tcu::Vec2(1131.5, 1132.5);
3946 		perVertexData.vert_f32d2_inter_0[3] = tcu::Vec2(1131.5, 1132.75);
3947 		perVertexData.vert_f32d2_inter_1[0] = tcu::Vec2(1141.25, 1142.5);
3948 		perVertexData.vert_f32d2_inter_1[1] = tcu::Vec2(1141.25, 1142.75);
3949 		perVertexData.vert_f32d2_inter_1[2] = tcu::Vec2(1141.5, 1142.5);
3950 		perVertexData.vert_f32d2_inter_1[3] = tcu::Vec2(1141.5, 1142.75);
3951 		perVertexData.vert_f32d1_inter_0[0] = 1151.25;
3952 		perVertexData.vert_f32d1_inter_0[1] = 1151.25;
3953 		perVertexData.vert_f32d1_inter_0[2] = 1151.5;
3954 		perVertexData.vert_f32d1_inter_0[3] = 1151.5;
3955 		perVertexData.vert_f32d1_inter_1[0] = 1161.25;
3956 		perVertexData.vert_f32d1_inter_1[1] = 1161.25;
3957 		perVertexData.vert_f32d1_inter_1[2] = 1161.5;
3958 		perVertexData.vert_f32d1_inter_1[3] = 1161.5;
3959 		perVertexData.vert_f16d4_inter_0[0] = tcu::Vec4(1171.25, 1172.5, 1173.875, 1174.0);
3960 		perVertexData.vert_f16d4_inter_0[1] = tcu::Vec4(1171.25, 1172.75, 1173.875, 1174.0);
3961 		perVertexData.vert_f16d4_inter_0[2] = tcu::Vec4(1171.5, 1172.5, 1173.875, 1174.0);
3962 		perVertexData.vert_f16d4_inter_0[3] = tcu::Vec4(1171.5, 1172.75, 1173.875, 1174.0);
3963 		perVertexData.vert_f16d4_inter_1[0] = tcu::Vec4(1181.25, 1182.5, 1183.875, 1184.0);
3964 		perVertexData.vert_f16d4_inter_1[1] = tcu::Vec4(1181.25, 1182.75, 1183.875, 1184.0);
3965 		perVertexData.vert_f16d4_inter_1[2] = tcu::Vec4(1181.5, 1182.5, 1183.875, 1184.0);
3966 		perVertexData.vert_f16d4_inter_1[3] = tcu::Vec4(1181.5, 1182.75, 1183.875, 1184.0);
3967 		perVertexData.vert_f16d3_inter_0[0] = tcu::Vec3(1191.25, 1192.5, 1193.875);
3968 		perVertexData.vert_f16d3_inter_0[1] = tcu::Vec3(1191.25, 1192.75, 1193.875);
3969 		perVertexData.vert_f16d3_inter_0[2] = tcu::Vec3(1191.5, 1192.5, 1193.875);
3970 		perVertexData.vert_f16d3_inter_0[3] = tcu::Vec3(1191.5, 1192.75, 1193.875);
3971 		perVertexData.vert_f16d3_inter_1[0] = tcu::Vec3(1201.25, 1202.5, 1203.875);
3972 		perVertexData.vert_f16d3_inter_1[1] = tcu::Vec3(1201.25, 1202.75, 1203.875);
3973 		perVertexData.vert_f16d3_inter_1[2] = tcu::Vec3(1201.5, 1202.5, 1203.875);
3974 		perVertexData.vert_f16d3_inter_1[3] = tcu::Vec3(1201.5, 1202.75, 1203.875);
3975 		perVertexData.vert_f16d2_inter_0[0] = tcu::Vec2(1211.25, 1212.5);
3976 		perVertexData.vert_f16d2_inter_0[1] = tcu::Vec2(1211.25, 1212.75);
3977 		perVertexData.vert_f16d2_inter_0[2] = tcu::Vec2(1211.5, 1212.5);
3978 		perVertexData.vert_f16d2_inter_0[3] = tcu::Vec2(1211.5, 1212.75);
3979 		perVertexData.vert_f16d2_inter_1[0] = tcu::Vec2(1221.25, 1222.5);
3980 		perVertexData.vert_f16d2_inter_1[1] = tcu::Vec2(1221.25, 1222.75);
3981 		perVertexData.vert_f16d2_inter_1[2] = tcu::Vec2(1221.5, 1222.5);
3982 		perVertexData.vert_f16d2_inter_1[3] = tcu::Vec2(1221.5, 1222.75);
3983 		perVertexData.vert_f16d1_inter_0[0] = 1231.25;
3984 		perVertexData.vert_f16d1_inter_0[1] = 1231.25;
3985 		perVertexData.vert_f16d1_inter_0[2] = 1231.5;
3986 		perVertexData.vert_f16d1_inter_0[3] = 1231.5;
3987 		perVertexData.vert_f16d1_inter_1[0] = 1241.25;
3988 		perVertexData.vert_f16d1_inter_1[1] = 1241.25;
3989 		perVertexData.vert_f16d1_inter_1[2] = 1241.5;
3990 		perVertexData.vert_f16d1_inter_1[3] = 1241.5;
3991 		perVertexData.vert_f64d4_flat_0[0] = tcu::Vec4(1251, 1252, 1253, 1254);
3992 		perVertexData.vert_f64d4_flat_0[1] = tcu::Vec4(1251, 1252, 1253, 1254);
3993 		perVertexData.vert_f64d4_flat_0[2] = tcu::Vec4(1251, 1252, 1253, 1254);
3994 		perVertexData.vert_f64d4_flat_0[3] = tcu::Vec4(1251, 1252, 1253, 1254);
3995 		perVertexData.vert_f64d4_flat_1[0] = tcu::Vec4(1261, 1262, 1263, 1264);
3996 		perVertexData.vert_f64d4_flat_1[1] = tcu::Vec4(1261, 1262, 1263, 1264);
3997 		perVertexData.vert_f64d4_flat_1[2] = tcu::Vec4(1261, 1262, 1263, 1264);
3998 		perVertexData.vert_f64d4_flat_1[3] = tcu::Vec4(1261, 1262, 1263, 1264);
3999 		perVertexData.vert_f64d3_flat_0[0] = tcu::Vec3(1271, 1272, 1273);
4000 		perVertexData.vert_f64d3_flat_0[1] = tcu::Vec3(1271, 1272, 1273);
4001 		perVertexData.vert_f64d3_flat_0[2] = tcu::Vec3(1271, 1272, 1273);
4002 		perVertexData.vert_f64d3_flat_0[3] = tcu::Vec3(1271, 1272, 1273);
4003 		perVertexData.vert_f64d3_flat_1[0] = tcu::Vec3(1281, 1282, 1283);
4004 		perVertexData.vert_f64d3_flat_1[1] = tcu::Vec3(1281, 1282, 1283);
4005 		perVertexData.vert_f64d3_flat_1[2] = tcu::Vec3(1281, 1282, 1283);
4006 		perVertexData.vert_f64d3_flat_1[3] = tcu::Vec3(1281, 1282, 1283);
4007 		perVertexData.vert_f64d2_flat_0[0] = tcu::Vec2(1291, 1292);
4008 		perVertexData.vert_f64d2_flat_0[1] = tcu::Vec2(1291, 1292);
4009 		perVertexData.vert_f64d2_flat_0[2] = tcu::Vec2(1291, 1292);
4010 		perVertexData.vert_f64d2_flat_0[3] = tcu::Vec2(1291, 1292);
4011 		perVertexData.vert_f64d2_flat_1[0] = tcu::Vec2(1301, 1302);
4012 		perVertexData.vert_f64d2_flat_1[1] = tcu::Vec2(1301, 1302);
4013 		perVertexData.vert_f64d2_flat_1[2] = tcu::Vec2(1301, 1302);
4014 		perVertexData.vert_f64d2_flat_1[3] = tcu::Vec2(1301, 1302);
4015 		perVertexData.vert_f64d1_flat_0[0] = 1311;
4016 		perVertexData.vert_f64d1_flat_0[1] = 1311;
4017 		perVertexData.vert_f64d1_flat_0[2] = 1311;
4018 		perVertexData.vert_f64d1_flat_0[3] = 1311;
4019 		perVertexData.vert_f64d1_flat_1[0] = 1321;
4020 		perVertexData.vert_f64d1_flat_1[1] = 1321;
4021 		perVertexData.vert_f64d1_flat_1[2] = 1321;
4022 		perVertexData.vert_f64d1_flat_1[3] = 1321;
4023 		perVertexData.vert_f32d4_flat_0[0] = tcu::Vec4(1331, 1332, 1333, 1334);
4024 		perVertexData.vert_f32d4_flat_0[1] = tcu::Vec4(1331, 1332, 1333, 1334);
4025 		perVertexData.vert_f32d4_flat_0[2] = tcu::Vec4(1331, 1332, 1333, 1334);
4026 		perVertexData.vert_f32d4_flat_0[3] = tcu::Vec4(1331, 1332, 1333, 1334);
4027 		perVertexData.vert_f32d4_flat_1[0] = tcu::Vec4(1341, 1342, 1343, 1344);
4028 		perVertexData.vert_f32d4_flat_1[1] = tcu::Vec4(1341, 1342, 1343, 1344);
4029 		perVertexData.vert_f32d4_flat_1[2] = tcu::Vec4(1341, 1342, 1343, 1344);
4030 		perVertexData.vert_f32d4_flat_1[3] = tcu::Vec4(1341, 1342, 1343, 1344);
4031 		perVertexData.vert_f32d3_flat_0[0] = tcu::Vec3(1351, 1352, 1353);
4032 		perVertexData.vert_f32d3_flat_0[1] = tcu::Vec3(1351, 1352, 1353);
4033 		perVertexData.vert_f32d3_flat_0[2] = tcu::Vec3(1351, 1352, 1353);
4034 		perVertexData.vert_f32d3_flat_0[3] = tcu::Vec3(1351, 1352, 1353);
4035 		perVertexData.vert_f32d3_flat_1[0] = tcu::Vec3(1361, 1362, 1363);
4036 		perVertexData.vert_f32d3_flat_1[1] = tcu::Vec3(1361, 1362, 1363);
4037 		perVertexData.vert_f32d3_flat_1[2] = tcu::Vec3(1361, 1362, 1363);
4038 		perVertexData.vert_f32d3_flat_1[3] = tcu::Vec3(1361, 1362, 1363);
4039 		perVertexData.vert_f32d2_flat_0[0] = tcu::Vec2(1371, 1372);
4040 		perVertexData.vert_f32d2_flat_0[1] = tcu::Vec2(1371, 1372);
4041 		perVertexData.vert_f32d2_flat_0[2] = tcu::Vec2(1371, 1372);
4042 		perVertexData.vert_f32d2_flat_0[3] = tcu::Vec2(1371, 1372);
4043 		perVertexData.vert_f32d2_flat_1[0] = tcu::Vec2(1381, 1382);
4044 		perVertexData.vert_f32d2_flat_1[1] = tcu::Vec2(1381, 1382);
4045 		perVertexData.vert_f32d2_flat_1[2] = tcu::Vec2(1381, 1382);
4046 		perVertexData.vert_f32d2_flat_1[3] = tcu::Vec2(1381, 1382);
4047 		perVertexData.vert_f32d1_flat_0[0] = 1391;
4048 		perVertexData.vert_f32d1_flat_0[1] = 1391;
4049 		perVertexData.vert_f32d1_flat_0[2] = 1391;
4050 		perVertexData.vert_f32d1_flat_0[3] = 1391;
4051 		perVertexData.vert_f32d1_flat_1[0] = 1401;
4052 		perVertexData.vert_f32d1_flat_1[1] = 1401;
4053 		perVertexData.vert_f32d1_flat_1[2] = 1401;
4054 		perVertexData.vert_f32d1_flat_1[3] = 1401;
4055 		perVertexData.vert_f16d4_flat_0[0] = tcu::Vec4(1411, 1412, 1413, 1414);
4056 		perVertexData.vert_f16d4_flat_0[1] = tcu::Vec4(1411, 1412, 1413, 1414);
4057 		perVertexData.vert_f16d4_flat_0[2] = tcu::Vec4(1411, 1412, 1413, 1414);
4058 		perVertexData.vert_f16d4_flat_0[3] = tcu::Vec4(1411, 1412, 1413, 1414);
4059 		perVertexData.vert_f16d4_flat_1[0] = tcu::Vec4(1421, 1422, 1423, 1424);
4060 		perVertexData.vert_f16d4_flat_1[1] = tcu::Vec4(1421, 1422, 1423, 1424);
4061 		perVertexData.vert_f16d4_flat_1[2] = tcu::Vec4(1421, 1422, 1423, 1424);
4062 		perVertexData.vert_f16d4_flat_1[3] = tcu::Vec4(1421, 1422, 1423, 1424);
4063 		perVertexData.vert_f16d3_flat_0[0] = tcu::Vec3(1431, 1432, 1433);
4064 		perVertexData.vert_f16d3_flat_0[1] = tcu::Vec3(1431, 1432, 1433);
4065 		perVertexData.vert_f16d3_flat_0[2] = tcu::Vec3(1431, 1432, 1433);
4066 		perVertexData.vert_f16d3_flat_0[3] = tcu::Vec3(1431, 1432, 1433);
4067 		perVertexData.vert_f16d3_flat_1[0] = tcu::Vec3(1441, 1442, 1443);
4068 		perVertexData.vert_f16d3_flat_1[1] = tcu::Vec3(1441, 1442, 1443);
4069 		perVertexData.vert_f16d3_flat_1[2] = tcu::Vec3(1441, 1442, 1443);
4070 		perVertexData.vert_f16d3_flat_1[3] = tcu::Vec3(1441, 1442, 1443);
4071 		perVertexData.vert_f16d2_flat_0[0] = tcu::Vec2(1451, 1452);
4072 		perVertexData.vert_f16d2_flat_0[1] = tcu::Vec2(1451, 1452);
4073 		perVertexData.vert_f16d2_flat_0[2] = tcu::Vec2(1451, 1452);
4074 		perVertexData.vert_f16d2_flat_0[3] = tcu::Vec2(1451, 1452);
4075 		perVertexData.vert_f16d2_flat_1[0] = tcu::Vec2(1461, 1462);
4076 		perVertexData.vert_f16d2_flat_1[1] = tcu::Vec2(1461, 1462);
4077 		perVertexData.vert_f16d2_flat_1[2] = tcu::Vec2(1461, 1462);
4078 		perVertexData.vert_f16d2_flat_1[3] = tcu::Vec2(1461, 1462);
4079 		perVertexData.vert_f16d1_flat_0[0] = 1471;
4080 		perVertexData.vert_f16d1_flat_0[1] = 1471;
4081 		perVertexData.vert_f16d1_flat_0[2] = 1471;
4082 		perVertexData.vert_f16d1_flat_0[3] = 1471;
4083 		perVertexData.vert_f16d1_flat_1[0] = 1481;
4084 		perVertexData.vert_f16d1_flat_1[1] = 1481;
4085 		perVertexData.vert_f16d1_flat_1[2] = 1481;
4086 		perVertexData.vert_f16d1_flat_1[3] = 1481;
4087 		perVertexData.vert_i64d4_flat_0[0] = tcu::IVec4(1491, 1492, 1493, 1494);
4088 		perVertexData.vert_i64d4_flat_0[1] = tcu::IVec4(1491, 1492, 1493, 1494);
4089 		perVertexData.vert_i64d4_flat_0[2] = tcu::IVec4(1491, 1492, 1493, 1494);
4090 		perVertexData.vert_i64d4_flat_0[3] = tcu::IVec4(1491, 1492, 1493, 1494);
4091 		perVertexData.vert_i64d4_flat_1[0] = tcu::IVec4(1501, 1502, 1503, 1504);
4092 		perVertexData.vert_i64d4_flat_1[1] = tcu::IVec4(1501, 1502, 1503, 1504);
4093 		perVertexData.vert_i64d4_flat_1[2] = tcu::IVec4(1501, 1502, 1503, 1504);
4094 		perVertexData.vert_i64d4_flat_1[3] = tcu::IVec4(1501, 1502, 1503, 1504);
4095 		perVertexData.vert_i64d3_flat_0[0] = tcu::IVec3(1511, 1512, 1513);
4096 		perVertexData.vert_i64d3_flat_0[1] = tcu::IVec3(1511, 1512, 1513);
4097 		perVertexData.vert_i64d3_flat_0[2] = tcu::IVec3(1511, 1512, 1513);
4098 		perVertexData.vert_i64d3_flat_0[3] = tcu::IVec3(1511, 1512, 1513);
4099 		perVertexData.vert_i64d3_flat_1[0] = tcu::IVec3(1521, 1522, 1523);
4100 		perVertexData.vert_i64d3_flat_1[1] = tcu::IVec3(1521, 1522, 1523);
4101 		perVertexData.vert_i64d3_flat_1[2] = tcu::IVec3(1521, 1522, 1523);
4102 		perVertexData.vert_i64d3_flat_1[3] = tcu::IVec3(1521, 1522, 1523);
4103 		perVertexData.vert_i64d2_flat_0[0] = tcu::IVec2(1531, 1532);
4104 		perVertexData.vert_i64d2_flat_0[1] = tcu::IVec2(1531, 1532);
4105 		perVertexData.vert_i64d2_flat_0[2] = tcu::IVec2(1531, 1532);
4106 		perVertexData.vert_i64d2_flat_0[3] = tcu::IVec2(1531, 1532);
4107 		perVertexData.vert_i64d2_flat_1[0] = tcu::IVec2(1541, 1542);
4108 		perVertexData.vert_i64d2_flat_1[1] = tcu::IVec2(1541, 1542);
4109 		perVertexData.vert_i64d2_flat_1[2] = tcu::IVec2(1541, 1542);
4110 		perVertexData.vert_i64d2_flat_1[3] = tcu::IVec2(1541, 1542);
4111 		perVertexData.vert_i64d1_flat_0[0] = 1551;
4112 		perVertexData.vert_i64d1_flat_0[1] = 1551;
4113 		perVertexData.vert_i64d1_flat_0[2] = 1551;
4114 		perVertexData.vert_i64d1_flat_0[3] = 1551;
4115 		perVertexData.vert_i64d1_flat_1[0] = 1561;
4116 		perVertexData.vert_i64d1_flat_1[1] = 1561;
4117 		perVertexData.vert_i64d1_flat_1[2] = 1561;
4118 		perVertexData.vert_i64d1_flat_1[3] = 1561;
4119 		perVertexData.vert_i32d4_flat_0[0] = tcu::IVec4(1571, 1572, 1573, 1574);
4120 		perVertexData.vert_i32d4_flat_0[1] = tcu::IVec4(1571, 1572, 1573, 1574);
4121 		perVertexData.vert_i32d4_flat_0[2] = tcu::IVec4(1571, 1572, 1573, 1574);
4122 		perVertexData.vert_i32d4_flat_0[3] = tcu::IVec4(1571, 1572, 1573, 1574);
4123 		perVertexData.vert_i32d4_flat_1[0] = tcu::IVec4(1581, 1582, 1583, 1584);
4124 		perVertexData.vert_i32d4_flat_1[1] = tcu::IVec4(1581, 1582, 1583, 1584);
4125 		perVertexData.vert_i32d4_flat_1[2] = tcu::IVec4(1581, 1582, 1583, 1584);
4126 		perVertexData.vert_i32d4_flat_1[3] = tcu::IVec4(1581, 1582, 1583, 1584);
4127 		perVertexData.vert_i32d3_flat_0[0] = tcu::IVec3(1591, 1592, 1593);
4128 		perVertexData.vert_i32d3_flat_0[1] = tcu::IVec3(1591, 1592, 1593);
4129 		perVertexData.vert_i32d3_flat_0[2] = tcu::IVec3(1591, 1592, 1593);
4130 		perVertexData.vert_i32d3_flat_0[3] = tcu::IVec3(1591, 1592, 1593);
4131 		perVertexData.vert_i32d3_flat_1[0] = tcu::IVec3(1601, 1602, 1603);
4132 		perVertexData.vert_i32d3_flat_1[1] = tcu::IVec3(1601, 1602, 1603);
4133 		perVertexData.vert_i32d3_flat_1[2] = tcu::IVec3(1601, 1602, 1603);
4134 		perVertexData.vert_i32d3_flat_1[3] = tcu::IVec3(1601, 1602, 1603);
4135 		perVertexData.vert_i32d2_flat_0[0] = tcu::IVec2(1611, 1612);
4136 		perVertexData.vert_i32d2_flat_0[1] = tcu::IVec2(1611, 1612);
4137 		perVertexData.vert_i32d2_flat_0[2] = tcu::IVec2(1611, 1612);
4138 		perVertexData.vert_i32d2_flat_0[3] = tcu::IVec2(1611, 1612);
4139 		perVertexData.vert_i32d2_flat_1[0] = tcu::IVec2(1621, 1622);
4140 		perVertexData.vert_i32d2_flat_1[1] = tcu::IVec2(1621, 1622);
4141 		perVertexData.vert_i32d2_flat_1[2] = tcu::IVec2(1621, 1622);
4142 		perVertexData.vert_i32d2_flat_1[3] = tcu::IVec2(1621, 1622);
4143 		perVertexData.vert_i32d1_flat_0[0] = 1631;
4144 		perVertexData.vert_i32d1_flat_0[1] = 1631;
4145 		perVertexData.vert_i32d1_flat_0[2] = 1631;
4146 		perVertexData.vert_i32d1_flat_0[3] = 1631;
4147 		perVertexData.vert_i32d1_flat_1[0] = 1641;
4148 		perVertexData.vert_i32d1_flat_1[1] = 1641;
4149 		perVertexData.vert_i32d1_flat_1[2] = 1641;
4150 		perVertexData.vert_i32d1_flat_1[3] = 1641;
4151 		perVertexData.vert_i16d4_flat_0[0] = tcu::IVec4(1651, 1652, 1653, 1654);
4152 		perVertexData.vert_i16d4_flat_0[1] = tcu::IVec4(1651, 1652, 1653, 1654);
4153 		perVertexData.vert_i16d4_flat_0[2] = tcu::IVec4(1651, 1652, 1653, 1654);
4154 		perVertexData.vert_i16d4_flat_0[3] = tcu::IVec4(1651, 1652, 1653, 1654);
4155 		perVertexData.vert_i16d4_flat_1[0] = tcu::IVec4(1661, 1662, 1663, 1664);
4156 		perVertexData.vert_i16d4_flat_1[1] = tcu::IVec4(1661, 1662, 1663, 1664);
4157 		perVertexData.vert_i16d4_flat_1[2] = tcu::IVec4(1661, 1662, 1663, 1664);
4158 		perVertexData.vert_i16d4_flat_1[3] = tcu::IVec4(1661, 1662, 1663, 1664);
4159 		perVertexData.vert_i16d3_flat_0[0] = tcu::IVec3(1671, 1672, 1673);
4160 		perVertexData.vert_i16d3_flat_0[1] = tcu::IVec3(1671, 1672, 1673);
4161 		perVertexData.vert_i16d3_flat_0[2] = tcu::IVec3(1671, 1672, 1673);
4162 		perVertexData.vert_i16d3_flat_0[3] = tcu::IVec3(1671, 1672, 1673);
4163 		perVertexData.vert_i16d3_flat_1[0] = tcu::IVec3(1681, 1682, 1683);
4164 		perVertexData.vert_i16d3_flat_1[1] = tcu::IVec3(1681, 1682, 1683);
4165 		perVertexData.vert_i16d3_flat_1[2] = tcu::IVec3(1681, 1682, 1683);
4166 		perVertexData.vert_i16d3_flat_1[3] = tcu::IVec3(1681, 1682, 1683);
4167 		perVertexData.vert_i16d2_flat_0[0] = tcu::IVec2(1691, 1692);
4168 		perVertexData.vert_i16d2_flat_0[1] = tcu::IVec2(1691, 1692);
4169 		perVertexData.vert_i16d2_flat_0[2] = tcu::IVec2(1691, 1692);
4170 		perVertexData.vert_i16d2_flat_0[3] = tcu::IVec2(1691, 1692);
4171 		perVertexData.vert_i16d2_flat_1[0] = tcu::IVec2(1701, 1702);
4172 		perVertexData.vert_i16d2_flat_1[1] = tcu::IVec2(1701, 1702);
4173 		perVertexData.vert_i16d2_flat_1[2] = tcu::IVec2(1701, 1702);
4174 		perVertexData.vert_i16d2_flat_1[3] = tcu::IVec2(1701, 1702);
4175 		perVertexData.vert_i16d1_flat_0[0] = 1711;
4176 		perVertexData.vert_i16d1_flat_0[1] = 1711;
4177 		perVertexData.vert_i16d1_flat_0[2] = 1711;
4178 		perVertexData.vert_i16d1_flat_0[3] = 1711;
4179 		perVertexData.vert_i16d1_flat_1[0] = 1721;
4180 		perVertexData.vert_i16d1_flat_1[1] = 1721;
4181 		perVertexData.vert_i16d1_flat_1[2] = 1721;
4182 		perVertexData.vert_i16d1_flat_1[3] = 1721;
4183 	}
4184 
4185 	InterfaceVariablesCase::PerPrimitiveData perPrimitiveData;
4186 	{
4187 		perPrimitiveData.prim_f64d4_flat_0[0] = tcu::Vec4(1011, 1012, 1013, 1014);
4188 		perPrimitiveData.prim_f64d4_flat_0[1] = tcu::Vec4(1011, 1012, 1013, 1014);
4189 		perPrimitiveData.prim_f64d4_flat_1[0] = tcu::Vec4(1021, 1022, 1023, 1024);
4190 		perPrimitiveData.prim_f64d4_flat_1[1] = tcu::Vec4(1021, 1022, 1023, 1024);
4191 		perPrimitiveData.prim_f64d3_flat_0[0] = tcu::Vec3(1031, 1032, 1033);
4192 		perPrimitiveData.prim_f64d3_flat_0[1] = tcu::Vec3(1031, 1032, 1033);
4193 		perPrimitiveData.prim_f64d3_flat_1[0] = tcu::Vec3(1041, 1042, 1043);
4194 		perPrimitiveData.prim_f64d3_flat_1[1] = tcu::Vec3(1041, 1042, 1043);
4195 		perPrimitiveData.prim_f64d2_flat_0[0] = tcu::Vec2(1051, 1052);
4196 		perPrimitiveData.prim_f64d2_flat_0[1] = tcu::Vec2(1051, 1052);
4197 		perPrimitiveData.prim_f64d2_flat_1[0] = tcu::Vec2(1061, 1062);
4198 		perPrimitiveData.prim_f64d2_flat_1[1] = tcu::Vec2(1061, 1062);
4199 		perPrimitiveData.prim_f64d1_flat_0[0] = 1071;
4200 		perPrimitiveData.prim_f64d1_flat_0[1] = 1071;
4201 		perPrimitiveData.prim_f64d1_flat_1[0] = 1081;
4202 		perPrimitiveData.prim_f64d1_flat_1[1] = 1081;
4203 		perPrimitiveData.prim_f32d4_flat_0[0] = tcu::Vec4(1091, 1092, 1093, 1094);
4204 		perPrimitiveData.prim_f32d4_flat_0[1] = tcu::Vec4(1091, 1092, 1093, 1094);
4205 		perPrimitiveData.prim_f32d4_flat_1[0] = tcu::Vec4(1101, 1102, 1103, 1104);
4206 		perPrimitiveData.prim_f32d4_flat_1[1] = tcu::Vec4(1101, 1102, 1103, 1104);
4207 		perPrimitiveData.prim_f32d3_flat_0[0] = tcu::Vec3(1111, 1112, 1113);
4208 		perPrimitiveData.prim_f32d3_flat_0[1] = tcu::Vec3(1111, 1112, 1113);
4209 		perPrimitiveData.prim_f32d3_flat_1[0] = tcu::Vec3(1121, 1122, 1123);
4210 		perPrimitiveData.prim_f32d3_flat_1[1] = tcu::Vec3(1121, 1122, 1123);
4211 		perPrimitiveData.prim_f32d2_flat_0[0] = tcu::Vec2(1131, 1132);
4212 		perPrimitiveData.prim_f32d2_flat_0[1] = tcu::Vec2(1131, 1132);
4213 		perPrimitiveData.prim_f32d2_flat_1[0] = tcu::Vec2(1141, 1142);
4214 		perPrimitiveData.prim_f32d2_flat_1[1] = tcu::Vec2(1141, 1142);
4215 		perPrimitiveData.prim_f32d1_flat_0[0] = 1151;
4216 		perPrimitiveData.prim_f32d1_flat_0[1] = 1151;
4217 		perPrimitiveData.prim_f32d1_flat_1[0] = 1161;
4218 		perPrimitiveData.prim_f32d1_flat_1[1] = 1161;
4219 		perPrimitiveData.prim_f16d4_flat_0[0] = tcu::Vec4(1171, 1172, 1173, 1174);
4220 		perPrimitiveData.prim_f16d4_flat_0[1] = tcu::Vec4(1171, 1172, 1173, 1174);
4221 		perPrimitiveData.prim_f16d4_flat_1[0] = tcu::Vec4(1181, 1182, 1183, 1184);
4222 		perPrimitiveData.prim_f16d4_flat_1[1] = tcu::Vec4(1181, 1182, 1183, 1184);
4223 		perPrimitiveData.prim_f16d3_flat_0[0] = tcu::Vec3(1191, 1192, 1193);
4224 		perPrimitiveData.prim_f16d3_flat_0[1] = tcu::Vec3(1191, 1192, 1193);
4225 		perPrimitiveData.prim_f16d3_flat_1[0] = tcu::Vec3(1201, 1202, 1203);
4226 		perPrimitiveData.prim_f16d3_flat_1[1] = tcu::Vec3(1201, 1202, 1203);
4227 		perPrimitiveData.prim_f16d2_flat_0[0] = tcu::Vec2(1211, 1212);
4228 		perPrimitiveData.prim_f16d2_flat_0[1] = tcu::Vec2(1211, 1212);
4229 		perPrimitiveData.prim_f16d2_flat_1[0] = tcu::Vec2(1221, 1222);
4230 		perPrimitiveData.prim_f16d2_flat_1[1] = tcu::Vec2(1221, 1222);
4231 		perPrimitiveData.prim_f16d1_flat_0[0] = 1231;
4232 		perPrimitiveData.prim_f16d1_flat_0[1] = 1231;
4233 		perPrimitiveData.prim_f16d1_flat_1[0] = 1241;
4234 		perPrimitiveData.prim_f16d1_flat_1[1] = 1241;
4235 		perPrimitiveData.prim_i64d4_flat_0[0] = tcu::IVec4(1251, 1252, 1253, 1254);
4236 		perPrimitiveData.prim_i64d4_flat_0[1] = tcu::IVec4(1251, 1252, 1253, 1254);
4237 		perPrimitiveData.prim_i64d4_flat_1[0] = tcu::IVec4(1261, 1262, 1263, 1264);
4238 		perPrimitiveData.prim_i64d4_flat_1[1] = tcu::IVec4(1261, 1262, 1263, 1264);
4239 		perPrimitiveData.prim_i64d3_flat_0[0] = tcu::IVec3(1271, 1272, 1273);
4240 		perPrimitiveData.prim_i64d3_flat_0[1] = tcu::IVec3(1271, 1272, 1273);
4241 		perPrimitiveData.prim_i64d3_flat_1[0] = tcu::IVec3(1281, 1282, 1283);
4242 		perPrimitiveData.prim_i64d3_flat_1[1] = tcu::IVec3(1281, 1282, 1283);
4243 		perPrimitiveData.prim_i64d2_flat_0[0] = tcu::IVec2(1291, 1292);
4244 		perPrimitiveData.prim_i64d2_flat_0[1] = tcu::IVec2(1291, 1292);
4245 		perPrimitiveData.prim_i64d2_flat_1[0] = tcu::IVec2(1301, 1302);
4246 		perPrimitiveData.prim_i64d2_flat_1[1] = tcu::IVec2(1301, 1302);
4247 		perPrimitiveData.prim_i64d1_flat_0[0] = 1311;
4248 		perPrimitiveData.prim_i64d1_flat_0[1] = 1311;
4249 		perPrimitiveData.prim_i64d1_flat_1[0] = 1321;
4250 		perPrimitiveData.prim_i64d1_flat_1[1] = 1321;
4251 		perPrimitiveData.prim_i32d4_flat_0[0] = tcu::IVec4(1331, 1332, 1333, 1334);
4252 		perPrimitiveData.prim_i32d4_flat_0[1] = tcu::IVec4(1331, 1332, 1333, 1334);
4253 		perPrimitiveData.prim_i32d4_flat_1[0] = tcu::IVec4(1341, 1342, 1343, 1344);
4254 		perPrimitiveData.prim_i32d4_flat_1[1] = tcu::IVec4(1341, 1342, 1343, 1344);
4255 		perPrimitiveData.prim_i32d3_flat_0[0] = tcu::IVec3(1351, 1352, 1353);
4256 		perPrimitiveData.prim_i32d3_flat_0[1] = tcu::IVec3(1351, 1352, 1353);
4257 		perPrimitiveData.prim_i32d3_flat_1[0] = tcu::IVec3(1361, 1362, 1363);
4258 		perPrimitiveData.prim_i32d3_flat_1[1] = tcu::IVec3(1361, 1362, 1363);
4259 		perPrimitiveData.prim_i32d2_flat_0[0] = tcu::IVec2(1371, 1372);
4260 		perPrimitiveData.prim_i32d2_flat_0[1] = tcu::IVec2(1371, 1372);
4261 		perPrimitiveData.prim_i32d2_flat_1[0] = tcu::IVec2(1381, 1382);
4262 		perPrimitiveData.prim_i32d2_flat_1[1] = tcu::IVec2(1381, 1382);
4263 		perPrimitiveData.prim_i32d1_flat_0[0] = 1391;
4264 		perPrimitiveData.prim_i32d1_flat_0[1] = 1391;
4265 		perPrimitiveData.prim_i32d1_flat_1[0] = 1401;
4266 		perPrimitiveData.prim_i32d1_flat_1[1] = 1401;
4267 		perPrimitiveData.prim_i16d4_flat_0[0] = tcu::IVec4(1411, 1412, 1413, 1414);
4268 		perPrimitiveData.prim_i16d4_flat_0[1] = tcu::IVec4(1411, 1412, 1413, 1414);
4269 		perPrimitiveData.prim_i16d4_flat_1[0] = tcu::IVec4(1421, 1422, 1423, 1424);
4270 		perPrimitiveData.prim_i16d4_flat_1[1] = tcu::IVec4(1421, 1422, 1423, 1424);
4271 		perPrimitiveData.prim_i16d3_flat_0[0] = tcu::IVec3(1431, 1432, 1433);
4272 		perPrimitiveData.prim_i16d3_flat_0[1] = tcu::IVec3(1431, 1432, 1433);
4273 		perPrimitiveData.prim_i16d3_flat_1[0] = tcu::IVec3(1441, 1442, 1443);
4274 		perPrimitiveData.prim_i16d3_flat_1[1] = tcu::IVec3(1441, 1442, 1443);
4275 		perPrimitiveData.prim_i16d2_flat_0[0] = tcu::IVec2(1451, 1452);
4276 		perPrimitiveData.prim_i16d2_flat_0[1] = tcu::IVec2(1451, 1452);
4277 		perPrimitiveData.prim_i16d2_flat_1[0] = tcu::IVec2(1461, 1462);
4278 		perPrimitiveData.prim_i16d2_flat_1[1] = tcu::IVec2(1461, 1462);
4279 		perPrimitiveData.prim_i16d1_flat_0[0] = 1471;
4280 		perPrimitiveData.prim_i16d1_flat_0[1] = 1471;
4281 		perPrimitiveData.prim_i16d1_flat_1[0] = 1481;
4282 		perPrimitiveData.prim_i16d1_flat_1[1] = 1481;
4283 	}
4284 
4285 	// Create and fill buffers with this data.
4286 	const auto			pvdSize		= static_cast<VkDeviceSize>(sizeof(perVertexData));
4287 	const auto			pvdInfo		= makeBufferCreateInfo(pvdSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4288 	BufferWithMemory	pvdData		(vkd, device, alloc, pvdInfo, MemoryRequirement::HostVisible);
4289 	auto&				pvdAlloc	= pvdData.getAllocation();
4290 	void*				pvdPtr		= pvdAlloc.getHostPtr();
4291 
4292 	const auto			ppdSize		= static_cast<VkDeviceSize>(sizeof(perPrimitiveData));
4293 	const auto			ppdInfo		= makeBufferCreateInfo(ppdSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4294 	BufferWithMemory	ppdData		(vkd, device, alloc, ppdInfo, MemoryRequirement::HostVisible);
4295 	auto&				ppdAlloc	= ppdData.getAllocation();
4296 	void*				ppdPtr		= ppdAlloc.getHostPtr();
4297 
4298 	deMemcpy(pvdPtr, &perVertexData, sizeof(perVertexData));
4299 	deMemcpy(ppdPtr, &perPrimitiveData, sizeof(perPrimitiveData));
4300 
4301 	flushAlloc(vkd, device, pvdAlloc);
4302 	flushAlloc(vkd, device, ppdAlloc);
4303 
4304 	// Descriptor set layout.
4305 	DescriptorSetLayoutBuilder setLayoutBuilder;
4306 	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bufStages);
4307 	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bufStages);
4308 	const auto setLayout = setLayoutBuilder.build(vkd, device);
4309 
4310 	// Create and update descriptor set.
4311 	DescriptorPoolBuilder descriptorPoolBuilder;
4312 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u);
4313 	const auto descriptorPool	= descriptorPoolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4314 	const auto descriptorSet	= makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
4315 
4316 	DescriptorSetUpdateBuilder updateBuilder;
4317 	const auto pvdBufferInfo = makeDescriptorBufferInfo(pvdData.get(), 0ull, pvdSize);
4318 	const auto ppdBufferInfo = makeDescriptorBufferInfo(ppdData.get(), 0ull, ppdSize);
4319 	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &pvdBufferInfo);
4320 	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &ppdBufferInfo);
4321 	updateBuilder.update(vkd, device);
4322 
4323 	// Pipeline layout.
4324 	const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
4325 
4326 	// Shader modules.
4327 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
4328 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
4329 
4330 	Move<VkShaderModule> taskShader;
4331 	if (hasTask)
4332 		taskShader = createShaderModule(vkd, device, binaries.get("task"));
4333 
4334 	// Render pass.
4335 	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
4336 
4337 	// Framebuffer.
4338 	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
4339 
4340 	// Viewport and scissor.
4341 	const auto						topHalf		= makeViewport(imageExtent.width, imageExtent.height / 2u);
4342 	const std::vector<VkViewport>	viewports	{ makeViewport(imageExtent), topHalf };
4343 	const std::vector<VkRect2D>		scissors	(2u, makeRect2D(imageExtent));
4344 
4345 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
4346 		taskShader.get(), meshShader.get(), fragShader.get(),
4347 		renderPass.get(), viewports, scissors);
4348 
4349 	// Command pool and buffer.
4350 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
4351 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4352 	const auto cmdBuffer	= cmdBufferPtr.get();
4353 
4354 	beginCommandBuffer(vkd, cmdBuffer);
4355 
4356 	// Run pipeline.
4357 	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 0.0f);
4358 	const auto		drawCount	= m_params->drawCount();
4359 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
4360 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
4361 	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
4362 	vkd.cmdDrawMeshTasksNV(cmdBuffer, drawCount, 0u);
4363 	endRenderPass(vkd, cmdBuffer);
4364 
4365 	// Copy color buffer to verification buffer.
4366 	const auto colorAccess		= (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
4367 	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
4368 	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
4369 	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
4370 
4371 	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
4372 	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
4373 	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
4374 
4375 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
4376 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
4377 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
4378 
4379 	endCommandBuffer(vkd, cmdBuffer);
4380 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4381 
4382 	// Generate reference image and compare results.
4383 	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
4384 	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
4385 
4386 	generateReferenceLevel();
4387 	invalidateAlloc(vkd, device, verificationBufferAlloc);
4388 	if (!verifyResult(verificationAccess))
4389 		TCU_FAIL("Result does not match reference; check log for details");
4390 
4391 	return tcu::TestStatus::pass("Pass");
4392 }
4393 
checkMeshSupport(Context & context)4394 void checkMeshSupport (Context& context)
4395 {
4396 	checkTaskMeshShaderSupportNV(context, false, true);
4397 }
4398 
initMixedPipelinesPrograms(vk::SourceCollections & programCollection)4399 void initMixedPipelinesPrograms (vk::SourceCollections& programCollection)
4400 {
4401 	std::ostringstream frag;
4402 	frag
4403 		<< "#version 450\n"
4404 		<< "\n"
4405 		<< "layout (location=0) in  vec4 inColor;\n"
4406 		<< "layout (location=0) out vec4 outColor;\n"
4407 		<< "\n"
4408 		<< "void main ()\n"
4409 		<< "{\n"
4410 		<< "    outColor = inColor;\n"
4411 		<< "}\n"
4412 		;
4413 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
4414 
4415 	const std::string pushConstantDecl =
4416 		"layout (push_constant, std430) uniform PushConstantBlock {\n"
4417 		"    vec4 color;\n"
4418 		"    uint firstVertex;\n"
4419 		"} pc;\n"
4420 		;
4421 
4422 	// The normal pipeline will have a binding with the vertex position and will take the vertex color from the push constants.
4423 	std::ostringstream vert;
4424 	vert
4425 		<< "#version 450\n"
4426 		<< "\n"
4427 		<< pushConstantDecl
4428 		<< "layout (location=0) out vec4 outColor;\n"
4429 		<< "layout (location=0) in  vec4 inPos;\n"
4430 		<< "\n"
4431 		<< "void main ()\n"
4432 		<< "{\n"
4433 		<< "    gl_Position = inPos;\n"
4434 		<< "    outColor    = pc.color;\n"
4435 		<< "}\n"
4436 		;
4437 	programCollection.glslSources.add("vert") << glu::VertexSource(vert.str());
4438 
4439 	// The mesh pipeline will emit a quad based on the first vertex as indicated by the push constants, using the push constant color as well.
4440 	std::ostringstream mesh;
4441 	mesh
4442 		<< "#version 450\n"
4443 		<< "#extension GL_NV_mesh_shader : enable\n"
4444 		<< "\n"
4445 		<< pushConstantDecl
4446 		<< "\n"
4447 		<< "layout (local_size_x=2) in;\n"
4448 		<< "layout (triangles) out;\n"
4449 		<< "layout (max_vertices=4, max_primitives=2) out;\n"
4450 		<< "\n"
4451 		<< "layout (location=0) out vec4 outColor[];\n"
4452 		<< "\n"
4453 		<< "layout (set=0, binding=0) readonly buffer VertexBlock {\n"
4454 		<< "    vec4 positions[];\n"
4455 		<< "} vertexData;\n"
4456 		<< "\n"
4457 		<< "void main ()\n"
4458 		<< "{\n"
4459 		<< "    // Emit 4 vertices starting at firstVertex, 2 per invocation.\n"
4460 		<< "    gl_PrimitiveCountNV = 2u;\n"
4461 		<< "    \n"
4462 		<< "    const uint localVertexOffset = 2u * gl_LocalInvocationIndex;\n"
4463 		<< "    const uint firstLocalVertex  = pc.firstVertex + localVertexOffset;\n"
4464 		<< "    const uint localIndexOffset  = 3u * gl_LocalInvocationIndex;\n"
4465 		<< "\n"
4466 		<< "    for (uint i = 0; i < 2; ++i)\n"
4467 		<< "    {\n"
4468 		<< "        gl_MeshVerticesNV[localVertexOffset + i].gl_Position = vertexData.positions[firstLocalVertex + i];\n"
4469 		<< "        outColor[localVertexOffset + i] = pc.color;\n"
4470 		<< "    }\n"
4471 		<< "\n"
4472 		<< "    // Emit 2 primitives, 1 per invocation.\n"
4473 		<< "    const uint indices[] = uint[](0, 1, 2, 2, 1, 3);\n"
4474 		<< "\n"
4475 		<< "    for (uint i = 0; i < 3; ++i)\n"
4476 		<< "    {\n"
4477 		<< "        const uint pos = localIndexOffset + i;\n"
4478 		<< "        gl_PrimitiveIndicesNV[pos] = indices[pos];\n"
4479 		<< "    }\n"
4480 		<< "}\n"
4481 		;
4482 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
4483 }
4484 
testMixedPipelines(Context & context)4485 tcu::TestStatus testMixedPipelines (Context& context)
4486 {
4487 	const auto&			vkd			= context.getDeviceInterface();
4488 	const auto			device		= context.getDevice();
4489 	auto&				alloc		= context.getDefaultAllocator();
4490 	const auto			queue		= context.getUniversalQueue();
4491 	const auto			qIndex		= context.getUniversalQueueFamilyIndex();
4492 
4493 	const auto			colorFormat	= getOutputFormat();
4494 	const auto			colorExtent	= makeExtent3D(32u, 32u, 1u);
4495 	const auto			colorUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
4496 	const auto			tcuFormat	= mapVkFormat(colorFormat);
4497 	const tcu::IVec3	iExtent		(static_cast<int>(colorExtent.width), static_cast<int>(colorExtent.height), static_cast<int>(colorExtent.depth));
4498 	const tcu::Vec4		clearValue	(0.0f, 0.0f, 0.0f, 1.0f);
4499 
4500 	// Divide the image in 4 quadrants and emit a "full-screen" quad (2 triangles) in each quadrant, using a mesh or normal pipeline.
4501 	// Replicate a standard quad 4 times with different offsets in X and Y for each quadrant.
4502 
4503 	// Triangle vertices for a single full-screen quad.
4504 	const std::vector<tcu::Vec4> stdQuad
4505 	{
4506 		tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f),
4507 		tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f),
4508 		tcu::Vec4(0.0f, 1.0f, 0.0f, 1.0f),
4509 		tcu::Vec4(1.0f, 1.0f, 0.0f, 1.0f),
4510 	};
4511 
4512 	// Offsets for each quadrant.
4513 	const std::vector<tcu::Vec4> quadrantOffsets
4514 	{
4515 		tcu::Vec4(-1.0f, -1.0f, 0.0f, 0.0f),		// Top left.
4516 		tcu::Vec4( 0.0f, -1.0f, 0.0f, 0.0f),		// Top right.
4517 		tcu::Vec4(-1.0f,  0.0f, 0.0f, 0.0f),		// Bottom left.
4518 		tcu::Vec4( 0.0f,  0.0f, 0.0f, 0.0f),		// Bottom right.
4519 	};
4520 
4521 	// Colors for each quadrant.
4522 	const std::vector<tcu::Vec4> quadrantColors
4523 	{
4524 		tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f),
4525 		tcu::Vec4(1.0f, 1.0f, 0.0f, 1.0f),
4526 		tcu::Vec4(1.0f, 0.0f, 1.0f, 1.0f),
4527 		tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f),
4528 	};
4529 
4530 	DE_ASSERT(quadrantOffsets.size() == quadrantColors.size());
4531 
4532 	// Fill the vertex buffer.
4533 	const auto				numVertices			= stdQuad.size() * quadrantOffsets.size();
4534 	std::vector<tcu::Vec4>	vertexBufferSrc;
4535 
4536 	vertexBufferSrc.reserve(numVertices);
4537 	for (size_t quadrantIdx = 0; quadrantIdx < quadrantOffsets.size(); ++quadrantIdx)
4538 	{
4539 		const auto& quadrantOffset = quadrantOffsets[quadrantIdx];
4540 
4541 		for (size_t vertexIdx = 0; vertexIdx < stdQuad.size(); ++vertexIdx)
4542 		{
4543 			const tcu::Vec4 pos = stdQuad[vertexIdx] + quadrantOffset;
4544 			vertexBufferSrc.push_back(pos);
4545 		}
4546 	}
4547 
4548 	const auto			vertexBufferSize	= de::dataSize(vertexBufferSrc);
4549 	const auto			vertexBufferUsage	= (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
4550 	const auto			vertexBufferInfo	= makeBufferCreateInfo(vertexBufferSize, vertexBufferUsage);
4551 	BufferWithMemory	vertexBuffer		(vkd, device, alloc, vertexBufferInfo, MemoryRequirement::HostVisible);
4552 	auto&				vertexBufferAlloc	= vertexBuffer.getAllocation();
4553 	tcu::Vec4*			vertexBufferData	= reinterpret_cast<tcu::Vec4*>(vertexBufferAlloc.getHostPtr());
4554 
4555 	deMemcpy(vertexBufferData, vertexBufferSrc.data(), vertexBufferSize);
4556 	flushAlloc(vkd, device, vertexBufferAlloc);
4557 
4558 	// Index buffer, only used for the classic pipeline.
4559 	const std::vector<uint32_t> vertexIndices {0u, 1u, 2u, 2u, 1u, 3u};
4560 
4561 	const auto indexBufferSize	= de::dataSize(vertexIndices);
4562 	const auto indexBufferUsage	= VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
4563 	const auto indexBufferInfo	= makeBufferCreateInfo(indexBufferSize, indexBufferUsage);
4564 
4565 	BufferWithMemory	indexBuffer			(vkd, device, alloc, indexBufferInfo, MemoryRequirement::HostVisible);
4566 	auto&				indexBufferAlloc	= indexBuffer.getAllocation();
4567 	void*				indexBufferData		= indexBufferAlloc.getHostPtr();
4568 
4569 	deMemcpy(indexBufferData, vertexIndices.data(), indexBufferSize);
4570 	flushAlloc(vkd, device, indexBufferAlloc);
4571 
4572 	// Color attachment.
4573 	const VkImageCreateInfo colorAttachmentInfo =
4574 	{
4575 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
4576 		nullptr,								//	const void*				pNext;
4577 		0u,										//	VkImageCreateFlags		flags;
4578 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
4579 		colorFormat,							//	VkFormat				format;
4580 		colorExtent,							//	VkExtent3D				extent;
4581 		1u,										//	uint32_t				mipLevels;
4582 		1u,										//	uint32_t				arrayLayers;
4583 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
4584 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
4585 		colorUsage,								//	VkImageUsageFlags		usage;
4586 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
4587 		0u,										//	uint32_t				queueFamilyIndexCount;
4588 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
4589 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
4590 	};
4591 	ImageWithMemory	colorAttachment	(vkd, device, alloc, colorAttachmentInfo, MemoryRequirement::Any);
4592 	const auto		colorSRR		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
4593 	const auto		colorView		= makeImageView(vkd, device, colorAttachment.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
4594 
4595 	// Verification buffer.
4596 	const auto			verificationBufferSize		= tcu::getPixelSize(tcuFormat) * iExtent.x() * iExtent.y() * iExtent.z();
4597 	const auto			verificationBufferSizeSz	= static_cast<VkDeviceSize>(verificationBufferSize);
4598 	const auto			verificationBufferInfo		= makeBufferCreateInfo(verificationBufferSizeSz, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4599 	BufferWithMemory	verificationBuffer			(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
4600 	auto&				verificationBufferAlloc		= verificationBuffer.getAllocation();
4601 	void*				verificationBufferData		= verificationBufferAlloc.getHostPtr();
4602 
4603 	// Render pass and framebuffer.
4604 	const auto renderPass	= makeRenderPass(vkd, device, colorFormat);
4605 	const auto framebuffer	= makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), colorExtent.width, colorExtent.height);
4606 
4607 	// Push constant range.
4608 	struct PushConstantBlock
4609 	{
4610 		tcu::Vec4	color;
4611 		uint32_t	firstVertex;
4612 	};
4613 
4614 	const auto pcSize	= static_cast<uint32_t>(sizeof(PushConstantBlock));
4615 	const auto pcStages	= (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_MESH_BIT_EXT);
4616 	const auto pcRange	= makePushConstantRange(pcStages, 0u, pcSize);
4617 
4618 	// No descriptor set layout for the classic pipeline.
4619 	// Descriptor set layout for the mesh pipeline using the vertex buffer.
4620 	DescriptorSetLayoutBuilder dsLayoutBuilder;
4621 	dsLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_MESH_BIT_EXT);
4622 	const auto meshDSLayout = dsLayoutBuilder.build(vkd, device);
4623 
4624 	// Pipeline layout for the classic pipeline.
4625 	const auto classicPipelineLayout = makePipelineLayout(vkd, device, 0u, nullptr, 1u, &pcRange);
4626 
4627 	// Pipeline layout for the mesh pipeline.
4628 	const auto meshPipelineLayout = makePipelineLayout(vkd, device, 1u, &meshDSLayout.get(), 1u, &pcRange);
4629 
4630 	// Descriptor pool and set with the vertex buffer.
4631 	DescriptorPoolBuilder poolBuilder;
4632 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
4633 	const auto descriptorPool		= poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4634 	const auto meshDescriptorSet	= makeDescriptorSet(vkd, device, descriptorPool.get(), meshDSLayout.get());
4635 
4636 	DescriptorSetUpdateBuilder updateBuilder;
4637 	const auto vertexBufferDescInfo = makeDescriptorBufferInfo(vertexBuffer.get(), 0ull, vertexBufferSize);
4638 	updateBuilder.writeSingle(meshDescriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &vertexBufferDescInfo);
4639 	updateBuilder.update(vkd, device);
4640 
4641 	// Shaders and pipelines.
4642 	const auto&	binaries	= context.getBinaryCollection();
4643 	const auto	vertModule	= createShaderModule(vkd, device, binaries.get("vert"));
4644 	const auto	meshModule	= createShaderModule(vkd, device, binaries.get("mesh"));
4645 	const auto	fragModule	= createShaderModule(vkd, device, binaries.get("frag"));
4646 
4647 	const std::vector<VkViewport>	viewports	(1u, makeViewport(colorExtent));
4648 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(colorExtent));
4649 
4650 	const auto classicPipeline = makeGraphicsPipeline(vkd, device, classicPipelineLayout.get(),
4651 		vertModule.get(), DE_NULL, DE_NULL, DE_NULL, fragModule.get(), renderPass.get(), viewports, scissors);
4652 
4653 	const auto meshPipeline = makeGraphicsPipeline(vkd, device, meshPipelineLayout.get(),
4654 		DE_NULL, meshModule.get(), fragModule.get(), renderPass.get(), viewports, scissors);
4655 
4656 	// Command pool and buffer.
4657 	const auto cmdPool		= makeCommandPool(vkd, device, qIndex);
4658 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4659 	const auto cmdBuffer	= cmdBufferPtr.get();
4660 
4661 	beginCommandBuffer(vkd, cmdBuffer);
4662 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearValue);
4663 
4664 	// Draw a triangle quad in each of the 4 image quadrants.
4665 	PushConstantBlock pcData;
4666 
4667 	for (size_t quadrantIdx = 0; quadrantIdx < quadrantColors.size(); ++quadrantIdx)
4668 	{
4669 		pcData.color				= quadrantColors[quadrantIdx];
4670 		pcData.firstVertex			= static_cast<uint32_t>(quadrantIdx * stdQuad.size());
4671 		const auto vOffset			= static_cast<VkDeviceSize>(pcData.firstVertex * sizeof(tcu::Vec4));
4672 		const bool isMeshQuadrant	= (quadrantIdx % 2u == 0u);
4673 
4674 		if (isMeshQuadrant)
4675 		{
4676 			vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get());
4677 			vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipelineLayout.get(), 0u, 1u, &meshDescriptorSet.get(), 0u, nullptr);
4678 			vkd.cmdPushConstants(cmdBuffer, meshPipelineLayout.get(), pcStages, 0u, pcSize, &pcData);
4679 			vkd.cmdDrawMeshTasksNV(cmdBuffer, 1u, 0u);
4680 		}
4681 		else
4682 		{
4683 			vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, classicPipeline.get());
4684 			vkd.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &vertexBuffer.get(), &vOffset);
4685 			vkd.cmdBindIndexBuffer(cmdBuffer, indexBuffer.get(), 0ull, VK_INDEX_TYPE_UINT32);
4686 			vkd.cmdPushConstants(cmdBuffer, classicPipelineLayout.get(), pcStages, 0u, pcSize, &pcData);
4687 			vkd.cmdDrawIndexed(cmdBuffer, static_cast<uint32_t>(vertexIndices.size()), 1u, 0u, 0, 0u);
4688 		}
4689 	}
4690 
4691 	endRenderPass(vkd, cmdBuffer);
4692 
4693 	copyImageToBuffer(vkd, cmdBuffer, colorAttachment.get(), verificationBuffer.get(), tcu::IVec2(iExtent.x(), iExtent.y()));
4694 	endCommandBuffer(vkd, cmdBuffer);
4695 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4696 
4697 	invalidateAlloc(vkd, device, verificationBufferAlloc);
4698 
4699 	// Prepare a reference image with the quadrant colors.
4700 	tcu::TextureLevel	refLevel	(tcuFormat, iExtent.x(), iExtent.y(), iExtent.z());
4701 	auto				refAccess	= refLevel.getAccess();
4702 	const tcu::Vec4		halfSize	(static_cast<float>(iExtent.x()) / 2.0f, static_cast<float>(iExtent.y()) / 2.0f, 0, 0);
4703 	const tcu::Vec4		fbOffset	(-1.0f, -1.0f, 0.0f, 0.0f);
4704 
4705 	for (size_t quadrantIdx = 0; quadrantIdx < quadrantOffsets.size(); ++quadrantIdx)
4706 	{
4707 		const auto&	offset		= quadrantOffsets[quadrantIdx];
4708 		const auto	absOffset	= (offset - fbOffset) * halfSize;
4709 		const auto	subregion	= tcu::getSubregion(refAccess,
4710 			static_cast<int>(absOffset.x()),
4711 			static_cast<int>(absOffset.y()),
4712 			static_cast<int>(halfSize.x()),
4713 			static_cast<int>(halfSize.y()));
4714 
4715 		tcu::clear(subregion, quadrantColors.at(quadrantIdx));
4716 	}
4717 
4718 	auto&								log			= context.getTestContext().getLog();
4719 	const tcu::ConstPixelBufferAccess	resAccess	(tcuFormat, iExtent, verificationBufferData);
4720 	const tcu::Vec4						threshold	(0.0f, 0.0f, 0.0f, 0.0f); // The chosen colors should need no threshold. They can be represented exactly.
4721 
4722 	if (!tcu::floatThresholdCompare(log, "TestResult", "", refAccess, resAccess, threshold, tcu::COMPARE_LOG_ON_ERROR))
4723 		TCU_FAIL("Check log for details");
4724 
4725 	return tcu::TestStatus::pass("Pass");
4726 }
4727 
4728 // Test reading the gl_TaskCountNV and gl_PrimitiveCountNV built-ins from several invocations.
4729 class CountReadCase : public MeshShaderMiscCase
4730 {
4731 public:
CountReadCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,ParamsPtr params)4732 					CountReadCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
4733 						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
4734 					{}
4735 
4736 	void			initPrograms	(vk::SourceCollections& programCollection) const override;
4737 	TestInstance*	createInstance	(Context& context) const override;
4738 
4739 	static constexpr uint32_t kLocalSize = 32u;
4740 };
4741 
4742 class CountReadInstance : public MeshShaderMiscInstance
4743 {
4744 public:
CountReadInstance(Context & context,const MiscTestParams * params)4745 	CountReadInstance (Context& context, const MiscTestParams* params)
4746 		: MeshShaderMiscInstance (context, params)
4747 	{}
4748 
4749 	void generateReferenceLevel () override;
4750 };
4751 
createInstance(Context & context) const4752 TestInstance* CountReadCase::createInstance (Context& context) const
4753 {
4754 	return new CountReadInstance(context, m_params.get());
4755 }
4756 
generateReferenceLevel()4757 void CountReadInstance::generateReferenceLevel ()
4758 {
4759 	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
4760 }
4761 
initPrograms(vk::SourceCollections & programCollection) const4762 void CountReadCase::initPrograms (vk::SourceCollections& programCollection) const
4763 {
4764 	DE_ASSERT(m_params->needsTaskShader());
4765 	DE_ASSERT(m_params->height == m_params->meshCount);
4766 	DE_ASSERT(m_params->width == kLocalSize);
4767 
4768 	std::ostringstream taskDataDeclStream;
4769 	taskDataDeclStream
4770 		<< "taskNV TaskData {\n"
4771 		<< "    vec4 color[" << kLocalSize << "];\n"
4772 		<< "} td;\n"
4773 		;
4774 	const auto taskDataDecl = taskDataDeclStream.str();
4775 
4776 	std::ostringstream task;
4777 	task
4778 		<< "#version 450\n"
4779 		<< "#extension GL_NV_mesh_shader : enable\n"
4780 		<< "\n"
4781 		<< "layout(local_size_x=" << kLocalSize << ") in;\n"
4782 		<< "\n"
4783 		<< "out " << taskDataDecl
4784 		<< "void main ()\n"
4785 		<< "{\n"
4786 		<< "    gl_TaskCountNV = 0u;\n"
4787 		<< "    if (gl_LocalInvocationIndex == 0u) {\n"
4788 		<< "        gl_TaskCountNV = " << m_params->meshCount << ";\n"
4789 		<< "    }\n"
4790 		<< "    memoryBarrierShared();\n"
4791 		<< "    barrier();\n"
4792 		<< "    td.color[gl_LocalInvocationIndex] = ((gl_TaskCountNV == " << m_params->meshCount << ") ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0));\n"
4793 		<< "}\n"
4794 		;
4795 	programCollection.glslSources.add("task") << glu::TaskSource(task.str());
4796 
4797 	std::ostringstream mesh;
4798 	mesh
4799 		<< "#version 450\n"
4800 		<< "#extension GL_NV_mesh_shader : enable\n"
4801 		<< "\n"
4802 		<< "in " << taskDataDecl
4803 		<< "\n"
4804 		<< "layout (local_size_x=" << kLocalSize << ") in;\n"
4805 		<< "layout (points) out;\n"
4806 		<< "layout (max_vertices=" << kLocalSize << ", max_primitives=" << kLocalSize << ") out;\n"
4807 		<< "\n"
4808 		<< "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
4809 		<< "\n"
4810 		<< "void main ()\n"
4811 		<< "{\n"
4812 		<< "    gl_PrimitiveCountNV = 0u;\n"
4813 		<< "    if (gl_LocalInvocationIndex == 0u) {\n"
4814 		<< "        gl_PrimitiveCountNV = " << kLocalSize << ";\n"
4815 		<< "    }\n"
4816 		<< "    memoryBarrierShared();\n"
4817 		<< "    barrier();\n"
4818 		<< "\n"
4819 		<< "    const vec4  color  = ((gl_PrimitiveCountNV == " << kLocalSize << ") ? td.color[gl_LocalInvocationIndex] : vec4(0.0, 0.0, 0.0, 1.0));\n"
4820 		<< "    const float xCoord = (((float(gl_LocalInvocationIndex) + 0.5) / " << m_params->width << ") * 2.0 - 1.0);\n"
4821 		<< "    const float yCoord = (((float(gl_WorkGroupID.x) + 0.5) / " << m_params->height << ") * 2.0 - 1.0);\n"
4822 		<< "\n"
4823 		<< "    gl_MeshVerticesNV[gl_LocalInvocationIndex].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
4824 		<< "    gl_PrimitiveIndicesNV[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;\n"
4825 		<< "    pointColor[gl_LocalInvocationIndex] = color;\n"
4826 		<< "}\n"
4827 		;
4828 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
4829 
4830 	// Default fragment shader.
4831 	MeshShaderMiscCase::initPrograms(programCollection);
4832 }
4833 
4834 } // anonymous namespace
4835 
createMeshShaderMiscTests(tcu::TestContext & testCtx)4836 tcu::TestCaseGroup* createMeshShaderMiscTests (tcu::TestContext& testCtx)
4837 {
4838 	GroupPtr miscTests (new tcu::TestCaseGroup(testCtx, "misc", "Mesh Shader Misc Tests"));
4839 
4840 	{
4841 		ParamsPtr paramsPtr (new MiscTestParams(
4842 			/*taskCount*/	tcu::just(2u),
4843 			/*meshCount*/	2u,
4844 			/*width*/		8u,
4845 			/*height*/		8u));
4846 
4847 		miscTests->addChild(new ComplexTaskDataCase(testCtx, "complex_task_data", "Pass a complex structure from the task to the mesh shader", std::move(paramsPtr)));
4848 	}
4849 
4850 	{
4851 		ParamsPtr paramsPtr (new MiscTestParams(
4852 			/*taskCount*/	tcu::Nothing,
4853 			/*meshCount*/	1u,
4854 			/*width*/		5u,		// Use an odd value so there's a pixel in the exact center.
4855 			/*height*/		7u));	// Idem.
4856 
4857 		miscTests->addChild(new SinglePointCase(testCtx, "single_point", "Draw a single point", std::move(paramsPtr)));
4858 	}
4859 
4860 	{
4861 		ParamsPtr paramsPtr (new MiscTestParams(
4862 			/*taskCount*/	tcu::Nothing,
4863 			/*meshCount*/	1u,
4864 			/*width*/		8u,
4865 			/*height*/		5u));	// Use an odd value so there's a center line.
4866 
4867 		miscTests->addChild(new SingleLineCase(testCtx, "single_line", "Draw a single line", std::move(paramsPtr)));
4868 	}
4869 
4870 	{
4871 		ParamsPtr paramsPtr (new MiscTestParams(
4872 			/*taskCount*/	tcu::Nothing,
4873 			/*meshCount*/	1u,
4874 			/*width*/		5u,		// Use an odd value so there's a pixel in the exact center.
4875 			/*height*/		7u));	// Idem.
4876 
4877 		miscTests->addChild(new SingleTriangleCase(testCtx, "single_triangle", "Draw a single triangle", std::move(paramsPtr)));
4878 	}
4879 
4880 	{
4881 		ParamsPtr paramsPtr (new MiscTestParams(
4882 			/*taskCount*/	tcu::Nothing,
4883 			/*meshCount*/	1u,
4884 			/*width*/		16u,
4885 			/*height*/		16u));
4886 
4887 		miscTests->addChild(new MaxPointsCase(testCtx, "max_points", "Draw the maximum number of points", std::move(paramsPtr)));
4888 	}
4889 
4890 	{
4891 		ParamsPtr paramsPtr (new MiscTestParams(
4892 			/*taskCount*/	tcu::Nothing,
4893 			/*meshCount*/	1u,
4894 			/*width*/		1u,
4895 			/*height*/		1020u));
4896 
4897 		miscTests->addChild(new MaxLinesCase(testCtx, "max_lines", "Draw the maximum number of lines", std::move(paramsPtr)));
4898 	}
4899 
4900 	{
4901 		ParamsPtr paramsPtr (new MiscTestParams(
4902 			/*taskCount*/	tcu::Nothing,
4903 			/*meshCount*/	1u,
4904 			/*width*/		512u,
4905 			/*height*/		512u));
4906 
4907 		miscTests->addChild(new MaxTrianglesCase(testCtx, "max_triangles", "Draw the maximum number of triangles", std::move(paramsPtr)));
4908 	}
4909 
4910 	{
4911 		ParamsPtr paramsPtr (new MiscTestParams(
4912 			/*taskCount*/	tcu::just(65535u),
4913 			/*meshCount*/	1u,
4914 			/*width*/		1360u,
4915 			/*height*/		1542u));
4916 
4917 		miscTests->addChild(new LargeWorkGroupCase(testCtx, "many_task_work_groups", "Generate a large number of task work groups", std::move(paramsPtr)));
4918 	}
4919 
4920 	{
4921 		ParamsPtr paramsPtr (new MiscTestParams(
4922 			/*taskCount*/	tcu::Nothing,
4923 			/*meshCount*/	65535u,
4924 			/*width*/		1360u,
4925 			/*height*/		1542u));
4926 
4927 		miscTests->addChild(new LargeWorkGroupCase(testCtx, "many_mesh_work_groups", "Generate a large number of mesh work groups", std::move(paramsPtr)));
4928 	}
4929 
4930 	{
4931 		ParamsPtr paramsPtr (new MiscTestParams(
4932 			/*taskCount*/	tcu::just(512u),
4933 			/*meshCount*/	512u,
4934 			/*width*/		4096u,
4935 			/*height*/		2048u));
4936 
4937 		miscTests->addChild(new LargeWorkGroupCase(testCtx, "many_task_mesh_work_groups", "Generate a large number of task and mesh work groups", std::move(paramsPtr)));
4938 	}
4939 
4940 	{
4941 		const PrimitiveType types[] = {
4942 			PrimitiveType::POINTS,
4943 			PrimitiveType::LINES,
4944 			PrimitiveType::TRIANGLES,
4945 		};
4946 
4947 		for (int i = 0; i < 2; ++i)
4948 		{
4949 			const bool extraWrites = (i > 0);
4950 
4951 			for (const auto primType : types)
4952 			{
4953 				std::unique_ptr<NoPrimitivesParams> params (new NoPrimitivesParams(
4954 				/*taskCount*/		(extraWrites ? tcu::just(1u) : tcu::Nothing),
4955 				/*meshCount*/		1u,
4956 				/*width*/			16u,
4957 				/*height*/			16u,
4958 				/*primitiveType*/	primType));
4959 
4960 				ParamsPtr			paramsPtr	(params.release());
4961 				const auto			primName	= primitiveTypeName(primType);
4962 				const std::string	name		= "no_" + primName + (extraWrites ? "_extra_writes" : "");
4963 				const std::string	desc		= "Run a pipeline that generates no " + primName + (extraWrites ? " but generates primitive data" : "");
4964 
4965 				miscTests->addChild(extraWrites
4966 					? (new NoPrimitivesExtraWritesCase(testCtx, name, desc, std::move(paramsPtr)))
4967 					: (new NoPrimitivesCase(testCtx, name, desc, std::move(paramsPtr))));
4968 			}
4969 		}
4970 	}
4971 
4972 	{
4973 		for (int i = 0; i < 2; ++i)
4974 		{
4975 			const bool useTaskShader = (i == 0);
4976 
4977 			ParamsPtr paramsPtr (new MiscTestParams(
4978 				/*taskCount*/	(useTaskShader ? tcu::just(1u) : tcu::Nothing),
4979 				/*meshCount*/	1u,
4980 				/*width*/		1u,
4981 				/*height*/		1u));
4982 
4983 			const std::string shader	= (useTaskShader ? "task" : "mesh");
4984 			const std::string name		= "barrier_in_" + shader;
4985 			const std::string desc		= "Use a control barrier in the " + shader + " shader";
4986 
4987 			miscTests->addChild(new SimpleBarrierCase(testCtx, name, desc, std::move(paramsPtr)));
4988 		}
4989 	}
4990 
4991 	{
4992 		const struct
4993 		{
4994 			MemoryBarrierType	memBarrierType;
4995 			std::string			caseName;
4996 		} barrierTypes[] =
4997 		{
4998 			{ MemoryBarrierType::SHARED,	"memory_barrier_shared"	},
4999 			{ MemoryBarrierType::GROUP,		"group_memory_barrier"	},
5000 		};
5001 
5002 		for (const auto& barrierCase : barrierTypes)
5003 		{
5004 			for (int i = 0; i < 2; ++i)
5005 			{
5006 				const bool useTaskShader = (i == 0);
5007 
5008 				std::unique_ptr<MemoryBarrierParams> paramsPtr (new MemoryBarrierParams(
5009 					/*taskCount*/		(useTaskShader ? tcu::just(1u) : tcu::Nothing),
5010 					/*meshCount*/		1u,
5011 					/*width*/			1u,
5012 					/*height*/			1u,
5013 					/*memBarrierType*/	barrierCase.memBarrierType));
5014 
5015 				const std::string shader	= (useTaskShader ? "task" : "mesh");
5016 				const std::string name		= barrierCase.caseName + "_in_" + shader;
5017 				const std::string desc		= "Use " + paramsPtr->glslFunc() + "() in the " + shader + " shader";
5018 
5019 				miscTests->addChild(new MemoryBarrierCase(testCtx, name, desc, std::move(paramsPtr)));
5020 			}
5021 		}
5022 	}
5023 
5024 	{
5025 		for (int i = 0; i < 2; ++i)
5026 		{
5027 			const bool useTaskShader	= (i > 0);
5028 			const auto name				= std::string("custom_attributes") + (useTaskShader ? "_and_task_shader" : "");
5029 			const auto desc				= std::string("Use several custom vertex and primitive attributes") + (useTaskShader ? " and also a task shader" : "");
5030 
5031 			ParamsPtr paramsPtr (new MiscTestParams(
5032 				/*taskCount*/	(useTaskShader ? tcu::just(1u) : tcu::Nothing),
5033 				/*meshCount*/	1u,
5034 				/*width*/		32u,
5035 				/*height*/		32u));
5036 
5037 			miscTests->addChild(new CustomAttributesCase(testCtx, name, desc, std::move(paramsPtr)));
5038 		}
5039 	}
5040 
5041 	{
5042 		for (int i = 0; i < 2; ++i)
5043 		{
5044 			const bool useTaskShader	= (i > 0);
5045 			const auto name				= std::string("push_constant") + (useTaskShader ? "_and_task_shader" : "");
5046 			const auto desc				= std::string("Use push constants in the mesh shader stage") + (useTaskShader ? " and also in the task shader stage" : "");
5047 
5048 			ParamsPtr paramsPtr (new MiscTestParams(
5049 				/*taskCount*/	(useTaskShader ? tcu::just(1u) : tcu::Nothing),
5050 				/*meshCount*/	1u,
5051 				/*width*/		16u,
5052 				/*height*/		16u));
5053 
5054 			miscTests->addChild(new PushConstantCase(testCtx, name, desc, std::move(paramsPtr)));
5055 		}
5056 	}
5057 
5058 	{
5059 		ParamsPtr paramsPtr (new MaximizeThreadsParams(
5060 			/*taskCount*/		tcu::Nothing,
5061 			/*meshCount*/		1u,
5062 			/*width*/			128u,
5063 			/*height*/			1u,
5064 			/*localSize*/		32u,
5065 			/*numVertices*/		128u,
5066 			/*numPrimitives*/	256u));
5067 
5068 		miscTests->addChild(new MaximizePrimitivesCase(testCtx, "maximize_primitives", "Use a large number of primitives compared to other sizes", std::move(paramsPtr)));
5069 	}
5070 
5071 	{
5072 		ParamsPtr paramsPtr (new MaximizeThreadsParams(
5073 			/*taskCount*/		tcu::Nothing,
5074 			/*meshCount*/		1u,
5075 			/*width*/			64u,
5076 			/*height*/			1u,
5077 			/*localSize*/		32u,
5078 			/*numVertices*/		256u,
5079 			/*numPrimitives*/	128u));
5080 
5081 		miscTests->addChild(new MaximizeVerticesCase(testCtx, "maximize_vertices", "Use a large number of vertices compared to other sizes", std::move(paramsPtr)));
5082 	}
5083 
5084 	{
5085 		const uint32_t kInvocationCases[] = { 32u, 64u, 128u, 256u };
5086 
5087 		for (const auto& invocationCase : kInvocationCases)
5088 		{
5089 			const auto invsStr		= std::to_string(invocationCase);
5090 			const auto numPixels	= invocationCase / 2u;
5091 
5092 			ParamsPtr paramsPtr (new MaximizeThreadsParams(
5093 				/*taskCount*/		tcu::Nothing,
5094 				/*meshCount*/		1u,
5095 				/*width*/			numPixels,
5096 				/*height*/			1u,
5097 				/*localSize*/		invocationCase,
5098 				/*numVertices*/		numPixels,
5099 				/*numPrimitives*/	numPixels));
5100 
5101 			miscTests->addChild(new MaximizeInvocationsCase(testCtx, "maximize_invocations_" + invsStr, "Use a large number of invocations compared to other sizes: " + invsStr, std::move(paramsPtr)));
5102 		}
5103 	}
5104 
5105 	if (false) // This test does not work and the spec is not clear that it should.
5106 	{
5107 		ParamsPtr paramsPtr (new MiscTestParams(
5108 			/*taskCount*/	tcu::just(1u),
5109 			/*meshCount*/	128u,
5110 			/*width*/		32u,
5111 			/*height*/		128u));
5112 
5113 		miscTests->addChild(new CountReadCase(testCtx, "count_reads", "Attempt to read gl_TaskCountNV and gl_PrimitiveCountNV from multiple invocations", std::move(paramsPtr)));
5114 	}
5115 
5116 
5117 	addFunctionCaseWithPrograms(miscTests.get(), "mixed_pipelines", "Mix classic and mesh shader pipelines in the same render pass", checkMeshSupport, initMixedPipelinesPrograms, testMixedPipelines);
5118 
5119 	return miscTests.release();
5120 }
5121 
createMeshShaderInOutTests(tcu::TestContext & testCtx)5122 tcu::TestCaseGroup* createMeshShaderInOutTests (tcu::TestContext& testCtx)
5123 {
5124 	GroupPtr inOutTests (new tcu::TestCaseGroup(testCtx, "in_out", "Mesh Shader Tests checking Input/Output interfaces"));
5125 
5126 	const struct
5127 	{
5128 		bool i64; bool f64; bool i16; bool f16;
5129 		const char* name;
5130 	} requiredFeatures[] =
5131 	{
5132 		// Restrict the number of combinations to avoid creating too many tests.
5133 		//	i64		f64		i16		f16		name
5134 		{	false,	false,	false,	false,	"32_bits_only"		},
5135 		{	true,	false,	false,	false,	"with_i64"			},
5136 		{	false,	true,	false,	false,	"with_f64"			},
5137 		{	true,	true,	false,	false,	"all_but_16_bits"	},
5138 		{	false,	false,	true,	false,	"with_i16"			},
5139 		{	false,	false,	false,	true,	"with_f16"			},
5140 		{	true,	true,	true,	true,	"all_types"			},
5141 	};
5142 
5143 	Owner			ownerCases[]			= { Owner::VERTEX, Owner::PRIMITIVE };
5144 	DataType		dataTypeCases[]			= { DataType::FLOAT, DataType::INTEGER };
5145 	BitWidth		bitWidthCases[]			= { BitWidth::B64, BitWidth::B32, BitWidth::B16 };
5146 	DataDim			dataDimCases[]			= { DataDim::SCALAR, DataDim::VEC2, DataDim::VEC3, DataDim::VEC4 };
5147 	Interpolation	interpolationCases[]	= { Interpolation::NORMAL, Interpolation::FLAT };
5148 	de::Random		rnd(1636723398u);
5149 
5150 	for (const auto& reqs : requiredFeatures)
5151 	{
5152 		GroupPtr reqsGroup (new tcu::TestCaseGroup(testCtx, reqs.name, ""));
5153 
5154 		// Generate the variable list according to the group requirements.
5155 		IfaceVarVecPtr varsPtr(new IfaceVarVec);
5156 
5157 		for (const auto& ownerCase : ownerCases)
5158 		for (const auto& dataTypeCase : dataTypeCases)
5159 		for (const auto& bitWidthCase : bitWidthCases)
5160 		for (const auto& dataDimCase : dataDimCases)
5161 		for (const auto& interpolationCase : interpolationCases)
5162 		{
5163 			if (dataTypeCase == DataType::FLOAT)
5164 			{
5165 				if (bitWidthCase == BitWidth::B64 && !reqs.f64)
5166 					continue;
5167 				if (bitWidthCase == BitWidth::B16 && !reqs.f16)
5168 					continue;
5169 			}
5170 			else if (dataTypeCase == DataType::INTEGER)
5171 			{
5172 				if (bitWidthCase == BitWidth::B64 && !reqs.i64)
5173 					continue;
5174 				if (bitWidthCase == BitWidth::B16 && !reqs.i16)
5175 					continue;
5176 			}
5177 
5178 			if (dataTypeCase == DataType::INTEGER && interpolationCase == Interpolation::NORMAL)
5179 				continue;
5180 
5181 			if (ownerCase == Owner::PRIMITIVE && interpolationCase == Interpolation::NORMAL)
5182 				continue;
5183 
5184 			if (dataTypeCase == DataType::FLOAT && bitWidthCase == BitWidth::B64 && interpolationCase == Interpolation::NORMAL)
5185 				continue;
5186 
5187 			for (uint32_t idx = 0u; idx < IfaceVar::kVarsPerType; ++idx)
5188 				varsPtr->push_back(IfaceVar(ownerCase, dataTypeCase, bitWidthCase, dataDimCase, interpolationCase, idx));
5189 		}
5190 
5191 		// Generating all permutations of the variables above would mean millions of tests, so we just generate some pseudorandom permutations.
5192 		constexpr uint32_t kPermutations = 40u;
5193 		for (uint32_t combIdx = 0; combIdx < kPermutations; ++combIdx)
5194 		{
5195 			const auto caseName = "permutation_" + std::to_string(combIdx);
5196 			GroupPtr rndGroup(new tcu::TestCaseGroup(testCtx, caseName.c_str(), ""));
5197 
5198 			// Duplicate and shuffle vector.
5199 			IfaceVarVecPtr permutVec (new IfaceVarVec(*varsPtr));
5200 			rnd.shuffle(begin(*permutVec), end(*permutVec));
5201 
5202 			// Cut the vector short to the usable number of locations.
5203 			{
5204 				uint32_t	usedLocations	= 0u;
5205 				size_t		vectorEnd		= 0u;
5206 				auto&		varVec			= *permutVec;
5207 
5208 				for (size_t i = 0; i < varVec.size(); ++i)
5209 				{
5210 					vectorEnd = i;
5211 					const auto varSize = varVec[i].getLocationSize();
5212 					if (usedLocations + varSize > InterfaceVariablesCase::kMaxLocations)
5213 						break;
5214 					usedLocations += varSize;
5215 				}
5216 
5217 				varVec.resize(vectorEnd);
5218 			}
5219 
5220 			for (int i = 0; i < 2; ++i)
5221 			{
5222 				const bool useTaskShader	= (i > 0);
5223 				const auto name				= (useTaskShader ? "task_mesh" : "mesh_only");
5224 
5225 				// Duplicate vector for this particular case so both variants have the same shuffle.
5226 				IfaceVarVecPtr paramsVec(new IfaceVarVec(*permutVec));
5227 
5228 				ParamsPtr paramsPtr (new InterfaceVariableParams(
5229 					/*taskCount*/	(useTaskShader ? tcu::just(1u) : tcu::Nothing),
5230 					/*meshCount*/	1u,
5231 					/*width*/		8u,
5232 					/*height*/		8u,
5233 					/*useInt64*/	reqs.i64,
5234 					/*useFloat64*/	reqs.f64,
5235 					/*useInt16*/	reqs.i16,
5236 					/*useFloat16*/	reqs.f16,
5237 					/*vars*/		std::move(paramsVec)));
5238 
5239 				rndGroup->addChild(new InterfaceVariablesCase(testCtx, name, "", std::move(paramsPtr)));
5240 			}
5241 
5242 			reqsGroup->addChild(rndGroup.release());
5243 		}
5244 
5245 		inOutTests->addChild(reqsGroup.release());
5246 	}
5247 
5248 	return inOutTests.release();
5249 }
5250 
5251 } // MeshShader
5252 } // vkt
5253