• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader Synchronization Tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderSyncTests.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkTypeUtil.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkBufferWithMemory.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkImageUtil.hpp"
38 
39 #include "deUniquePtr.hpp"
40 
41 #include <iostream>
42 #include <sstream>
43 #include <vector>
44 
45 namespace vkt
46 {
47 namespace MeshShader
48 {
49 
50 namespace
51 {
52 
53 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
54 
55 using namespace vk;
56 
57 // Stages that will be used in these tests.
58 enum class Stage
59 {
60 	HOST = 0,
61 	TRANSFER,
62 	TASK,
63 	MESH,
64 	FRAG,
65 };
66 
operator <<(std::ostream & stream,Stage stage)67 std::ostream& operator<< (std::ostream& stream, Stage stage)
68 {
69 	switch (stage)
70 	{
71 	case Stage::HOST:		stream << "host";		break;
72 	case Stage::TRANSFER:	stream << "transfer";	break;
73 	case Stage::TASK:		stream << "task";		break;
74 	case Stage::MESH:		stream << "mesh";		break;
75 	case Stage::FRAG:		stream << "frag";		break;
76 	default: DE_ASSERT(false); break;
77 	}
78 
79 	return stream;
80 }
81 
isShaderStage(Stage stage)82 bool isShaderStage (Stage stage)
83 {
84 	return (stage == Stage::TASK || stage == Stage::MESH || stage == Stage::FRAG);
85 }
86 
stageToFlags(Stage stage)87 VkPipelineStageFlags stageToFlags (Stage stage)
88 {
89 	switch (stage)
90 	{
91 	case Stage::HOST:		return VK_PIPELINE_STAGE_HOST_BIT;
92 	case Stage::TRANSFER:	return VK_PIPELINE_STAGE_TRANSFER_BIT;
93 	case Stage::TASK:		return VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV;
94 	case Stage::MESH:		return VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV;
95 	case Stage::FRAG:		return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
96 	default:				DE_ASSERT(false); break;
97 	}
98 
99 	// Unreachable.
100 	DE_ASSERT(false);
101 	return 0u;
102 }
103 
getImageFormat()104 VkFormat getImageFormat ()
105 {
106 	return VK_FORMAT_R32_UINT;
107 }
108 
getImageExtent()109 VkExtent3D getImageExtent ()
110 {
111 	return makeExtent3D(1u, 1u, 1u);
112 }
113 
114 // Types of resources we will use.
115 enum class ResourceType
116 {
117 	UNIFORM_BUFFER = 0,
118 	STORAGE_BUFFER,
119 	STORAGE_IMAGE,
120 	SAMPLED_IMAGE,
121 };
122 
resourceTypeToDescriptor(ResourceType resType)123 VkDescriptorType resourceTypeToDescriptor (ResourceType resType)
124 {
125 	switch (resType)
126 	{
127 	case ResourceType::UNIFORM_BUFFER:	return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
128 	case ResourceType::STORAGE_BUFFER:	return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
129 	case ResourceType::STORAGE_IMAGE:	return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
130 	case ResourceType::SAMPLED_IMAGE:	return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
131 	default:							DE_ASSERT(false); break;
132 	}
133 
134 	// Unreachable.
135 	DE_ASSERT(false);
136 	return VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
137 }
138 
139 // Will the test use a specific barrier or a general memory barrier?
140 enum class BarrierType
141 {
142 	GENERAL = 0,
143 	SPECIFIC,
144 };
145 
146 // Types of writes we will use.
147 enum class WriteAccess
148 {
149 	HOST_WRITE = 0,
150 	TRANSFER_WRITE,
151 	SHADER_WRITE,
152 };
153 
writeAccessToFlags(WriteAccess access)154 VkAccessFlags writeAccessToFlags (WriteAccess access)
155 {
156 	switch (access)
157 	{
158 	case WriteAccess::HOST_WRITE:		return VK_ACCESS_HOST_WRITE_BIT;
159 	case WriteAccess::TRANSFER_WRITE:	return VK_ACCESS_TRANSFER_WRITE_BIT;
160 	case WriteAccess::SHADER_WRITE:		return VK_ACCESS_SHADER_WRITE_BIT;
161 	default:							DE_ASSERT(false); break;
162 	}
163 
164 	// Unreachable.
165 	DE_ASSERT(false);
166 	return 0u;
167 }
168 
169 // Types of reads we will use.
170 enum class ReadAccess
171 {
172 	HOST_READ = 0,
173 	TRANSFER_READ,
174 	SHADER_READ,
175 	UNIFORM_READ,
176 };
177 
readAccessToFlags(ReadAccess access)178 VkAccessFlags readAccessToFlags (ReadAccess access)
179 {
180 	switch (access)
181 	{
182 	case ReadAccess::HOST_READ:			return VK_ACCESS_HOST_READ_BIT;
183 	case ReadAccess::TRANSFER_READ:		return VK_ACCESS_TRANSFER_READ_BIT;
184 	case ReadAccess::SHADER_READ:		return VK_ACCESS_SHADER_READ_BIT;
185 	case ReadAccess::UNIFORM_READ:		return VK_ACCESS_UNIFORM_READ_BIT;
186 	default:							DE_ASSERT(false); break;
187 	}
188 
189 	// Unreachable.
190 	DE_ASSERT(false);
191 	return 0u;
192 }
193 
194 // Auxiliary functions to verify certain combinations are possible.
195 
196 // Check if the writing stage can use the specified write access.
canWriteFromStageAsAccess(Stage writeStage,WriteAccess access)197 bool canWriteFromStageAsAccess (Stage writeStage, WriteAccess access)
198 {
199 	switch (writeStage)
200 	{
201 	case Stage::HOST:		return (access == WriteAccess::HOST_WRITE);
202 	case Stage::TRANSFER:	return (access == WriteAccess::TRANSFER_WRITE);
203 	case Stage::TASK:		// fallthrough
204 	case Stage::MESH:		// fallthrough
205 	case Stage::FRAG:		return (access == WriteAccess::SHADER_WRITE);
206 	default:				DE_ASSERT(false); break;
207 	}
208 
209 	return false;
210 }
211 
212 // Check if the reading stage can use the specified read access.
canReadFromStageAsAccess(Stage readStage,ReadAccess access)213 bool canReadFromStageAsAccess (Stage readStage, ReadAccess access)
214 {
215 	switch (readStage)
216 	{
217 	case Stage::HOST:		return (access == ReadAccess::HOST_READ);
218 	case Stage::TRANSFER:	return (access == ReadAccess::TRANSFER_READ);
219 	case Stage::TASK:		// fallthrough
220 	case Stage::MESH:		// fallthrough
221 	case Stage::FRAG:		return (access == ReadAccess::SHADER_READ || access == ReadAccess::UNIFORM_READ);
222 	default:				DE_ASSERT(false); break;
223 	}
224 
225 	return false;
226 }
227 
228 // Check if reading the given resource type is possible with the given type of read access.
canReadResourceAsAccess(ResourceType resType,ReadAccess access)229 bool canReadResourceAsAccess (ResourceType resType, ReadAccess access)
230 {
231 	if (access == ReadAccess::UNIFORM_READ)
232 		return (resType == ResourceType::UNIFORM_BUFFER);
233 	return true;
234 }
235 
236 // Check if writing to the given resource type is possible with the given type of write access.
canWriteResourceAsAccess(ResourceType resType,WriteAccess access)237 bool canWriteResourceAsAccess (ResourceType resType, WriteAccess access)
238 {
239 	if (resType == ResourceType::UNIFORM_BUFFER)
240 		return (access != WriteAccess::SHADER_WRITE);
241 	return true;
242 }
243 
244 // Check if the given stage can write to the given resource type.
canWriteTo(Stage stage,ResourceType resType)245 bool canWriteTo (Stage stage, ResourceType resType)
246 {
247 	switch (stage)
248 	{
249 	case Stage::HOST:		return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
250 	case Stage::TRANSFER:	return true;
251 	case Stage::TASK:		// fallthrough
252 	case Stage::MESH:		return (resType == ResourceType::STORAGE_BUFFER || resType == ResourceType::STORAGE_IMAGE);
253 	default:				DE_ASSERT(false); break;
254 	}
255 
256 	return false;
257 }
258 
259 // Check if the given stage can read from the given resource type.
canReadFrom(Stage stage,ResourceType resType)260 bool canReadFrom (Stage stage, ResourceType resType)
261 {
262 	switch (stage)
263 	{
264 	case Stage::HOST:		return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
265 	case Stage::TRANSFER:	// fallthrough
266 	case Stage::TASK:		// fallthrough
267 	case Stage::MESH:
268 	case Stage::FRAG:		return true;
269 	default:				DE_ASSERT(false); break;
270 	}
271 
272 	return false;
273 }
274 
275 // Will we need to store the test value in an auxiliar buffer to be read?
needsAuxiliarSourceBuffer(Stage fromStage,Stage toStage)276 bool needsAuxiliarSourceBuffer (Stage fromStage, Stage toStage)
277 {
278 	DE_UNREF(toStage);
279 	return (fromStage == Stage::TRANSFER);
280 }
281 
282 // Will we need to store the read operation result into an auxiliar buffer to be checked?
needsAuxiliarDestBuffer(Stage fromStage,Stage toStage)283 bool needsAuxiliarDestBuffer (Stage fromStage, Stage toStage)
284 {
285 	DE_UNREF(fromStage);
286 	return (toStage == Stage::TRANSFER);
287 }
288 
289 // Needs any auxiliar buffer for any case?
needsAuxiliarBuffer(Stage fromStage,Stage toStage)290 bool needsAuxiliarBuffer (Stage fromStage, Stage toStage)
291 {
292 	return (needsAuxiliarSourceBuffer(fromStage, toStage) || needsAuxiliarDestBuffer(fromStage, toStage));
293 }
294 
295 // Will the final value be stored in the auxiliar destination buffer?
valueInAuxiliarDestBuffer(Stage toStage)296 bool valueInAuxiliarDestBuffer (Stage toStage)
297 {
298 	return (toStage == Stage::TRANSFER);
299 }
300 
301 // Will the final value be stored in the resource buffer itself?
valueInResourceBuffer(Stage toStage)302 bool valueInResourceBuffer (Stage toStage)
303 {
304 	return (toStage == Stage::HOST);
305 }
306 
307 // Will the final value be stored in the color buffer?
valueInColorBuffer(Stage toStage)308 bool valueInColorBuffer (Stage toStage)
309 {
310 	return (!valueInAuxiliarDestBuffer(toStage) && !valueInResourceBuffer(toStage));
311 }
312 
313 // Image usage flags for the image resource.
resourceImageUsageFlags(ResourceType resourceType)314 VkImageUsageFlags resourceImageUsageFlags (ResourceType resourceType)
315 {
316 	VkImageUsageFlags flags = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
317 
318 	switch (resourceType)
319 	{
320 	case ResourceType::STORAGE_IMAGE:	flags |= VK_IMAGE_USAGE_STORAGE_BIT;	break;
321 	case ResourceType::SAMPLED_IMAGE:	flags |= VK_IMAGE_USAGE_SAMPLED_BIT;	break;
322 	default: DE_ASSERT(false); break;
323 	}
324 
325 	return flags;
326 }
327 
328 // Buffer usage flags for the buffer resource.
resourceBufferUsageFlags(ResourceType resourceType)329 VkBufferUsageFlags resourceBufferUsageFlags (ResourceType resourceType)
330 {
331 	VkBufferUsageFlags flags = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
332 
333 	switch (resourceType)
334 	{
335 	case ResourceType::UNIFORM_BUFFER:	flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;	break;
336 	case ResourceType::STORAGE_BUFFER:	flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;	break;
337 	default: DE_ASSERT(false); break;
338 	}
339 
340 	return flags;
341 }
342 
343 // Is the resource written to and read from a shader stage?
readAndWriteFromShaders(Stage fromStage,Stage toStage)344 bool readAndWriteFromShaders (Stage fromStage, Stage toStage)
345 {
346 	return (isShaderStage(fromStage) && isShaderStage(toStage));
347 }
348 
349 struct TestParams
350 {
351 	Stage			fromStage;
352 	Stage			toStage;
353 	ResourceType	resourceType;
354 	BarrierType		barrierType;
355 	WriteAccess		writeAccess;
356 	ReadAccess		readAccess;
357 	uint32_t		testValue;
358 
359 protected:
readsOrWritesInvkt::MeshShader::__anon43782a3a0111::TestParams360 	bool readsOrWritesIn (Stage stage) const
361 	{
362 		DE_ASSERT(fromStage != toStage);
363 		return (fromStage == stage || toStage == stage);
364 	}
365 
366 public:
needsTaskvkt::MeshShader::__anon43782a3a0111::TestParams367 	bool needsTask () const
368 	{
369 		return readsOrWritesIn(Stage::TASK);
370 	}
371 
readsOrWritesInMeshvkt::MeshShader::__anon43782a3a0111::TestParams372 	bool readsOrWritesInMesh () const
373 	{
374 		return readsOrWritesIn(Stage::MESH);
375 	}
376 
getResourceDeclvkt::MeshShader::__anon43782a3a0111::TestParams377 	std::string getResourceDecl () const
378 	{
379 		const auto			imgFormat		= ((resourceType == ResourceType::STORAGE_IMAGE) ? ", r32ui" : "");
380 		const auto			storagePrefix	= ((writeAccess == WriteAccess::SHADER_WRITE) ? "" : "readonly ");
381 		std::ostringstream	decl;
382 
383 		decl << "layout (set=0, binding=0" << imgFormat << ") ";
384 		switch (resourceType)
385 		{
386 		case ResourceType::UNIFORM_BUFFER:	decl << "uniform UniformBuffer { uint value; } ub;";					break;
387 		case ResourceType::STORAGE_BUFFER:	decl << storagePrefix << "buffer StorageBuffer { uint value; } sb;";	break;
388 		case ResourceType::STORAGE_IMAGE:	decl << storagePrefix << "uniform uimage2D si;";						break;
389 		case ResourceType::SAMPLED_IMAGE:	decl << "uniform usampler2D sampled;";									break;
390 		default:							DE_ASSERT(false);														break;
391 		}
392 
393 		decl << "\n";
394 		return decl.str();
395 	}
396 
397 	struct PushConstantStruct
398 	{
399 		uint32_t writeVal;
400 		uint32_t readVal;
401 	};
402 
403 	// Get declaration for the "pc" push constant block. Must match the structure above.
getPushConstantDeclvkt::MeshShader::__anon43782a3a0111::TestParams404 	std::string getPushConstantDecl () const
405 	{
406 		std::ostringstream pc;
407 		pc
408 			<< "layout (push_constant, std430) uniform PushConstantBlock {\n"
409 			<< "    uint writeVal;\n"
410 			<< "    uint readVal;\n"
411 			<< "} pc;\n"
412 			;
413 		return pc.str();
414 	}
415 
getReadStatementvkt::MeshShader::__anon43782a3a0111::TestParams416 	std::string getReadStatement (const std::string& outName) const
417 	{
418 		std::ostringstream statement;
419 		statement << "    if (pc.readVal > 0u) { " << outName << " = ";
420 
421 		switch (resourceType)
422 		{
423 		case ResourceType::UNIFORM_BUFFER:	statement << "ub.value";							break;
424 		case ResourceType::STORAGE_BUFFER:	statement << "sb.value";							break;
425 		case ResourceType::STORAGE_IMAGE:	statement << "imageLoad(si, ivec2(0, 0)).x";		break;
426 		case ResourceType::SAMPLED_IMAGE:	statement << "texture(sampled, vec2(0.5, 0.5)).x";	break;
427 		default:							DE_ASSERT(false); break;
428 		}
429 
430 		statement << "; }\n";
431 		return statement.str();
432 	}
433 
getWriteStatementvkt::MeshShader::__anon43782a3a0111::TestParams434 	std::string getWriteStatement (const std::string& valueName) const
435 	{
436 		std::ostringstream statement;
437 		statement << "    if (pc.writeVal > 0u) { ";
438 
439 		switch (resourceType)
440 		{
441 		case ResourceType::STORAGE_BUFFER:	statement << "sb.value = " << valueName;											break;
442 		case ResourceType::STORAGE_IMAGE:	statement << "imageStore(si, ivec2(0, 0), uvec4(" << valueName << ", 0, 0, 0))";	break;
443 		case ResourceType::UNIFORM_BUFFER:	// fallthrough
444 		case ResourceType::SAMPLED_IMAGE:	// fallthrough
445 		default:							DE_ASSERT(false); break;
446 		}
447 
448 		statement << "; }\n";
449 		return statement.str();
450 	}
451 
getResourceShaderStagesvkt::MeshShader::__anon43782a3a0111::TestParams452 	VkShaderStageFlags getResourceShaderStages () const
453 	{
454 		VkShaderStageFlags flags = 0u;
455 
456 		if (fromStage == Stage::TASK || toStage == Stage::TASK)	flags |= VK_SHADER_STAGE_TASK_BIT_NV;
457 		if (fromStage == Stage::MESH || toStage == Stage::MESH)	flags |= VK_SHADER_STAGE_MESH_BIT_NV;
458 		if (fromStage == Stage::FRAG || toStage == Stage::FRAG)	flags |= VK_SHADER_STAGE_FRAGMENT_BIT;
459 
460 		// We assume at least something must be done either on the task or mesh shaders for the tests to be interesting.
461 		DE_ASSERT((flags & (VK_SHADER_STAGE_TASK_BIT_NV | VK_SHADER_STAGE_MESH_BIT_NV)) != 0u);
462 		return flags;
463 	}
464 
465 	// We'll prefer to keep the image in the general layout if it will be written to from a shader stage or if the barrier is going to be a generic memory barrier.
preferGeneralLayoutvkt::MeshShader::__anon43782a3a0111::TestParams466 	bool preferGeneralLayout () const
467 	{
468 		return (isShaderStage(fromStage) || (barrierType == BarrierType::GENERAL) || (resourceType == ResourceType::STORAGE_IMAGE));
469 	}
470 
471 	// A subpass dependency is needed if both the source and destination stages are shader stages.
needsSubpassDependencyvkt::MeshShader::__anon43782a3a0111::TestParams472 	bool needsSubpassDependency () const
473 	{
474 		return readAndWriteFromShaders(fromStage, toStage);
475 	}
476 };
477 
478 class MeshShaderSyncCase : public vkt::TestCase
479 {
480 public:
MeshShaderSyncCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TestParams & params)481 					MeshShaderSyncCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params)
482 						: vkt::TestCase (testCtx, name, description), m_params (params)
483 						{}
484 
~MeshShaderSyncCase(void)485 	virtual			~MeshShaderSyncCase		(void) {}
486 
487 	void			checkSupport			(Context& context) const override;
488 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
489 	TestInstance*	createInstance			(Context& context) const override;
490 
491 protected:
492 	TestParams		m_params;
493 };
494 
495 class MeshShaderSyncInstance : public vkt::TestInstance
496 {
497 public:
MeshShaderSyncInstance(Context & context,const TestParams & params)498 						MeshShaderSyncInstance	(Context& context, const TestParams& params) : vkt::TestInstance(context), m_params(params) {}
~MeshShaderSyncInstance(void)499 	virtual				~MeshShaderSyncInstance	(void) {}
500 
501 	tcu::TestStatus		iterate					(void) override;
502 
503 protected:
504 	TestParams			m_params;
505 };
506 
checkSupport(Context & context) const507 void MeshShaderSyncCase::checkSupport (Context& context) const
508 {
509 	checkTaskMeshShaderSupportNV(context, m_params.needsTask(), true);
510 
511 	if (m_params.writeAccess == WriteAccess::SHADER_WRITE)
512 	{
513 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
514 	}
515 }
516 
initPrograms(vk::SourceCollections & programCollection) const517 void MeshShaderSyncCase::initPrograms (vk::SourceCollections& programCollection) const
518 {
519 	const bool	needsTaskShader	= m_params.needsTask();
520 	const auto	valueStr		= de::toString(m_params.testValue);
521 	const auto	resourceDecl	= m_params.getResourceDecl();
522 	const auto	pcDecl			= m_params.getPushConstantDecl();
523 
524 	if (needsTaskShader)
525 	{
526 
527 		std::ostringstream task;
528 		task
529 			<< "#version 450\n"
530 			<< "#extension GL_NV_mesh_shader : enable\n"
531 			<< "\n"
532 			<< "layout(local_size_x=1) in;\n"
533 			<< "\n"
534 			<< "out taskNV TaskData { uint value; } td;\n"
535 			<< "\n"
536 			<< resourceDecl
537 			<< pcDecl
538 			<< "\n"
539 			<< "void main ()\n"
540 			<< "{\n"
541 			<< "    gl_TaskCountNV = 1u;\n"
542 			<< "    td.value = 0u;\n"
543 			<< ((m_params.fromStage == Stage::TASK)	? m_params.getWriteStatement(valueStr)	: "")
544 			<< ((m_params.toStage == Stage::TASK)	? m_params.getReadStatement("td.value")	: "")
545 			<< "}\n"
546 			;
547 		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
548 	}
549 
550 	{
551 		const bool rwInMesh = m_params.readsOrWritesInMesh();
552 
553 		std::ostringstream mesh;
554 		mesh
555 			<< "#version 450\n"
556 			<< "#extension GL_NV_mesh_shader : enable\n"
557 			<< "\n"
558 			<< "layout(local_size_x=1) in;\n"
559 			<< "layout(triangles) out;\n"
560 			<< "layout(max_vertices=3, max_primitives=1) out;\n"
561 			<< "\n"
562 			<< (needsTaskShader ? "in taskNV TaskData { uint value; } td;\n" : "")
563 			<< "layout (location=0) out perprimitiveNV uint primitiveValue[];\n"
564 			<< "\n"
565 			<< (rwInMesh ? resourceDecl : "")
566 			<< (rwInMesh ? pcDecl : "")
567 			<< "\n"
568 			<< "void main ()\n"
569 			<< "{\n"
570 			<< "    gl_PrimitiveCountNV = 1u;\n"
571 			<< (needsTaskShader ? "    primitiveValue[0] = td.value;\n" : "")
572 			<< ((m_params.fromStage == Stage::MESH)	? m_params.getWriteStatement(valueStr)				: "")
573 			<< ((m_params.toStage == Stage::MESH)	? m_params.getReadStatement("primitiveValue[0]")	: "")
574 			<< "\n"
575 			<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
576 			<< "    gl_MeshVerticesNV[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
577 			<< "    gl_MeshVerticesNV[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
578 			<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
579 			<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
580 			<< "    gl_PrimitiveIndicesNV[2] = 2;\n"
581 			<< "}\n"
582 			;
583 		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
584 	}
585 
586 	{
587 		const bool			readFromFrag = (m_params.toStage == Stage::FRAG);
588 		std::ostringstream	frag;
589 
590 		frag
591 			<< "#version 450\n"
592 			<< "#extension GL_NV_mesh_shader : enable\n"
593 			<< "\n"
594 			<< "layout (location=0) in perprimitiveNV flat uint primitiveValue;\n"
595 			<< "layout (location=0) out uvec4 outColor;\n"
596 			<< "\n"
597 			<< (readFromFrag ? resourceDecl : "")
598 			<< (readFromFrag ? pcDecl : "")
599 			<< "\n"
600 			<< "void main ()\n"
601 			<< "{\n"
602 			<< "    outColor = uvec4(primitiveValue, 0, 0, 0);\n"
603 			<< "    uint readVal = 0u;\n"
604 			<< (readFromFrag ? m_params.getReadStatement("readVal")	: "")
605 			<< (readFromFrag ? "    outColor = uvec4(readVal, 0, 0, 0);\n"		: "")
606 			<< "}\n"
607 			;
608 		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
609 	}
610 }
611 
createInstance(Context & context) const612 TestInstance* MeshShaderSyncCase::createInstance (Context& context) const
613 {
614 	return new MeshShaderSyncInstance(context, m_params);
615 }
616 
617 // General description behind these tests.
618 //
619 //	From				To
620 //	==============================
621 //	HOST				TASK			Prepare buffer from host. Only valid for uniform and storage buffers. Read value from task into td.value. Verify color buffer.
622 //	HOST				MESH			Same situation. Read value from mesh into primitiveValue[0]. Verify color buffer.
623 //	TRANSFER			TASK			Prepare auxiliary host-coherent source buffer from host. Copy buffer to buffer or buffer to image. Read from task into td.value. Verify color buffer.
624 //	TRANSFER			MESH			Same initial steps. Read from mesh into primitiveValue[0]. Verify color buffer.
625 //	TASK				MESH			Write value to buffer or image from task shader. Only valid for storage buffers and images. Read from mesh into primitiveValue[0]. Verify color buffer.
626 //	TASK				FRAG			Same write procedure and restrictions. Read from frag into outColor. Verify color buffer.
627 //	TASK				TRANSFER		Same write procedure and restrictions. Prepare auxiliary host-coherent read buffer and copy buffer to buffer or image to buffer. Verify auxiliary buffer.
628 //	TASK				HOST			Due to From/To restrictions, only valid for storage buffers. Same write procedure. Read and verify buffer directly.
629 //	MESH				FRAG			Same as task to frag but the write instructions need to be in the mesh shader.
630 //	MESH				TRANSFER		Same as task to transfer but the write instructions need to be in the mesh shader.
631 //	MESH				HOST			Same as task to host but the write instructions need to be in the mesh shader.
632 //
633 
createCustomRenderPass(const DeviceInterface & vkd,VkDevice device,VkFormat colorFormat,const TestParams & params)634 Move<VkRenderPass> createCustomRenderPass (const DeviceInterface& vkd, VkDevice device, VkFormat colorFormat, const TestParams& params)
635 {
636 	const std::vector<VkAttachmentDescription> attachmentDescs =
637 	{
638 		{
639 			0u,											//	VkAttachmentDescriptionFlags	flags;
640 			colorFormat,								//	VkFormat						format;
641 			VK_SAMPLE_COUNT_1_BIT,						//	VkSampleCountFlagBits			samples;
642 			VK_ATTACHMENT_LOAD_OP_CLEAR,				//	VkAttachmentLoadOp				loadOp;
643 			VK_ATTACHMENT_STORE_OP_STORE,				//	VkAttachmentStoreOp				storeOp;
644 			VK_ATTACHMENT_LOAD_OP_DONT_CARE,			//	VkAttachmentLoadOp				stencilLoadOp;
645 			VK_ATTACHMENT_STORE_OP_DONT_CARE,			//	VkAttachmentStoreOp				stencilStoreOp;
646 			VK_IMAGE_LAYOUT_UNDEFINED,					//	VkImageLayout					initialLayout;
647 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,	//	VkImageLayout					finalLayout;
648 		}
649 	};
650 
651 	const std::vector<VkAttachmentReference> attachmentRefs = { { 0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL } };
652 
653 	const std::vector<VkSubpassDescription> subpassDescs =
654 	{
655 		{
656 			0u,												//	VkSubpassDescriptionFlags		flags;
657 			VK_PIPELINE_BIND_POINT_GRAPHICS,				//	VkPipelineBindPoint				pipelineBindPoint;
658 			0u,												//	uint32_t						inputAttachmentCount;
659 			nullptr,										//	const VkAttachmentReference*	pInputAttachments;
660 			static_cast<uint32_t>(attachmentRefs.size()),	//	uint32_t						colorAttachmentCount;
661 			de::dataOrNull(attachmentRefs),					//	const VkAttachmentReference*	pColorAttachments;
662 			nullptr,										//	const VkAttachmentReference*	pResolveAttachments;
663 			nullptr,										//	const VkAttachmentReference*	pDepthStencilAttachment;
664 			0u,												//	uint32_t						preserveAttachmentCount;
665 			nullptr,										//	const uint32_t*					pPreserveAttachments;
666 		}
667 	};
668 
669 	// When both stages are shader stages, the dependency will be expressed as a subpass dependency.
670 	std::vector<VkSubpassDependency> dependencies;
671 	if (params.needsSubpassDependency())
672 	{
673 		const VkSubpassDependency dependency =
674 		{
675 			0u,											//	uint32_t				srcSubpass;
676 			0u,											//	uint32_t				dstSubpass;
677 			stageToFlags(params.fromStage),				//	VkPipelineStageFlags	srcStageMask;
678 			stageToFlags(params.toStage),				//	VkPipelineStageFlags	dstStageMask;
679 			writeAccessToFlags(params.writeAccess),		//	VkAccessFlags			srcAccessMask;
680 			readAccessToFlags(params.readAccess),		//	VkAccessFlags			dstAccessMask;
681 			0u,											//	VkDependencyFlags		dependencyFlags;
682 		};
683 		dependencies.push_back(dependency);
684 	}
685 
686 	const VkRenderPassCreateInfo createInfo =
687 	{
688 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,		//	VkStructureType					sType;
689 		nullptr,										//	const void*						pNext;
690 		0u,												//	VkRenderPassCreateFlags			flags;
691 		static_cast<uint32_t>(attachmentDescs.size()),	//	uint32_t						attachmentCount;
692 		de::dataOrNull(attachmentDescs),				//	const VkAttachmentDescription*	pAttachments;
693 		static_cast<uint32_t>(subpassDescs.size()),		//	uint32_t						subpassCount;
694 		de::dataOrNull(subpassDescs),					//	const VkSubpassDescription*		pSubpasses;
695 		static_cast<uint32_t>(dependencies.size()),		//	uint32_t						dependencyCount;
696 		de::dataOrNull(dependencies),					//	const VkSubpassDependency*		pDependencies;
697 	};
698 
699 	return createRenderPass(vkd, device, &createInfo);
700 }
701 
hostToTransferMemoryBarrier(const DeviceInterface & vkd,VkCommandBuffer cmdBuffer)702 void hostToTransferMemoryBarrier (const DeviceInterface& vkd, VkCommandBuffer cmdBuffer)
703 {
704 	const auto barrier = makeMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
705 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 1u, &barrier, 0u, nullptr, 0u, nullptr);
706 }
707 
transferToHostMemoryBarrier(const DeviceInterface & vkd,VkCommandBuffer cmdBuffer)708 void transferToHostMemoryBarrier (const DeviceInterface& vkd, VkCommandBuffer cmdBuffer)
709 {
710 	const auto barrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
711 	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &barrier, 0u, nullptr, 0u, nullptr);
712 }
713 
iterate(void)714 tcu::TestStatus MeshShaderSyncInstance::iterate (void)
715 {
716 	const auto&	vkd						= m_context.getDeviceInterface();
717 	const auto	device					= m_context.getDevice();
718 	auto&		alloc					= m_context.getDefaultAllocator();
719 	const auto	queueIndex				= m_context.getUniversalQueueFamilyIndex();
720 	const auto	queue					= m_context.getUniversalQueue();
721 
722 	const auto	imageFormat				= getImageFormat();
723 	const auto	imageExtent				= getImageExtent();
724 	const auto	colorBufferUsage		= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
725 	const auto	colorSRR				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
726 	const auto	colorSRL				= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
727 	const auto	bufferSize				= static_cast<VkDeviceSize>(sizeof(m_params.testValue));
728 	const auto	descriptorType			= resourceTypeToDescriptor(m_params.resourceType);
729 	const auto	resourceStages			= m_params.getResourceShaderStages();
730 	const auto	auxiliarBufferUsage		= (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
731 	const auto	useGeneralLayout		= m_params.preferGeneralLayout();
732 	const bool	needsTwoDrawCalls		= m_params.needsSubpassDependency();
733 
734 	const auto	writeAccessFlags		= writeAccessToFlags(m_params.writeAccess);
735 	const auto	readAccessFlags			= readAccessToFlags(m_params.readAccess);
736 	const auto	fromStageFlags			= stageToFlags(m_params.fromStage);
737 	const auto	toStageFlags			= stageToFlags(m_params.toStage);
738 
739 	// Prepare color buffer.
740 	const VkImageCreateInfo colorBufferCreateInfo =
741 	{
742 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
743 		nullptr,								//	const void*				pNext;
744 		0u,										//	VkImageCreateFlags		flags;
745 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
746 		imageFormat,							//	VkFormat				format;
747 		imageExtent,							//	VkExtent3D				extent;
748 		1u,										//	uint32_t				mipLevels;
749 		1u,										//	uint32_t				arrayLayers;
750 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
751 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
752 		colorBufferUsage,						//	VkImageUsageFlags		usage;
753 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
754 		0u,										//	uint32_t				queueFamilyIndexCount;
755 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
756 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
757 	};
758 	ImageWithMemory	colorBuffer		(vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any);
759 	const auto		colorBufferView	= makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
760 
761 	// Main resource.
762 	using ImageWithMemoryPtr	= de::MovePtr<ImageWithMemory>;
763 	using BufferWithMemoryPtr	= de::MovePtr<BufferWithMemory>;
764 
765 	ImageWithMemoryPtr	imageResource;
766 	Move<VkImageView>	imageResourceView;
767 	VkImageLayout		imageDescriptorLayout	= (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
768 	VkImageLayout		currentLayout			= VK_IMAGE_LAYOUT_UNDEFINED;
769 	BufferWithMemoryPtr	bufferResource;
770 
771 	bool useImageResource	= false;
772 	bool useBufferResource	= false;
773 
774 	switch (m_params.resourceType)
775 	{
776 	case ResourceType::UNIFORM_BUFFER:
777 	case ResourceType::STORAGE_BUFFER:
778 		useBufferResource = true;
779 		break;
780 	case ResourceType::STORAGE_IMAGE:
781 	case ResourceType::SAMPLED_IMAGE:
782 		useImageResource = true;
783 		break;
784 	default:
785 		DE_ASSERT(false);
786 		break;
787 	}
788 
789 	// One resource needed.
790 	DE_ASSERT(useImageResource != useBufferResource);
791 
792 	if (useImageResource)
793 	{
794 		const auto resourceImageUsage = resourceImageUsageFlags(m_params.resourceType);
795 
796 		const VkImageCreateInfo resourceCreateInfo =
797 		{
798 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
799 			nullptr,								//	const void*				pNext;
800 			0u,										//	VkImageCreateFlags		flags;
801 			VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
802 			imageFormat,							//	VkFormat				format;
803 			imageExtent,							//	VkExtent3D				extent;
804 			1u,										//	uint32_t				mipLevels;
805 			1u,										//	uint32_t				arrayLayers;
806 			VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
807 			VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
808 			resourceImageUsage,						//	VkImageUsageFlags		usage;
809 			VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
810 			0u,										//	uint32_t				queueFamilyIndexCount;
811 			nullptr,								//	const uint32_t*			pQueueFamilyIndices;
812 			VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
813 		};
814 		imageResource		= ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, resourceCreateInfo, MemoryRequirement::Any));
815 		imageResourceView	= makeImageView(vkd, device, imageResource->get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
816 	}
817 	else
818 	{
819 		const auto resourceBufferUsage		= resourceBufferUsageFlags(m_params.resourceType);
820 		const auto resourceBufferCreateInfo	= makeBufferCreateInfo(bufferSize, resourceBufferUsage);
821 		bufferResource = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, resourceBufferCreateInfo, MemoryRequirement::HostVisible));
822 	}
823 
824 	Move<VkSampler> sampler;
825 	if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
826 	{
827 		const VkSamplerCreateInfo samplerCreateInfo =
828 		{
829 			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,	//	VkStructureType			sType;
830 			nullptr,								//	const void*				pNext;
831 			0u,										//	VkSamplerCreateFlags	flags;
832 			VK_FILTER_NEAREST,						//	VkFilter				magFilter;
833 			VK_FILTER_NEAREST,						//	VkFilter				minFilter;
834 			VK_SAMPLER_MIPMAP_MODE_NEAREST,			//	VkSamplerMipmapMode		mipmapMode;
835 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,	//	VkSamplerAddressMode	addressModeU;
836 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,	//	VkSamplerAddressMode	addressModeV;
837 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,	//	VkSamplerAddressMode	addressModeW;
838 			0.0f,									//	float					mipLodBias;
839 			VK_FALSE,								//	VkBool32				anisotropyEnable;
840 			1.0f,									//	float					maxAnisotropy;
841 			VK_FALSE,								//	VkBool32				compareEnable;
842 			VK_COMPARE_OP_NEVER,					//	VkCompareOp				compareOp;
843 			0.0f,									//	float					minLod;
844 			0.0f,									//	float					maxLod;
845 			VK_BORDER_COLOR_INT_TRANSPARENT_BLACK,	//	VkBorderColor			borderColor;
846 			VK_FALSE,								//	VkBool32				unnormalizedCoordinates;
847 		};
848 		sampler = createSampler(vkd, device, &samplerCreateInfo);
849 	}
850 
851 	// Auxiliary host-coherent buffer for some cases. Being host-coherent lets us avoid extra barriers that would "pollute" synchronization tests.
852 	BufferWithMemoryPtr hostCoherentBuffer;
853 	void*				hostCoherentDataPtr = nullptr;
854 	if (needsAuxiliarBuffer(m_params.fromStage, m_params.toStage))
855 	{
856 		const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
857 		hostCoherentBuffer	= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
858 		hostCoherentDataPtr	= hostCoherentBuffer->getAllocation().getHostPtr();
859 	}
860 
861 	// Descriptor pool.
862 	Move<VkDescriptorPool> descriptorPool;
863 	{
864 		DescriptorPoolBuilder poolBuilder;
865 		poolBuilder.addType(descriptorType);
866 		descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
867 	}
868 
869 	// Descriptor set layout.
870 	Move<VkDescriptorSetLayout> setLayout;
871 	{
872 		DescriptorSetLayoutBuilder layoutBuilder;
873 		layoutBuilder.addSingleBinding(descriptorType, resourceStages);
874 		setLayout = layoutBuilder.build(vkd, device);
875 	}
876 
877 	// Descriptor set.
878 	const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
879 
880 	// Update descriptor set.
881 	{
882 		DescriptorSetUpdateBuilder	updateBuilder;
883 		const auto					location = DescriptorSetUpdateBuilder::Location::binding(0u);
884 
885 		switch (descriptorType)
886 		{
887 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
888 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
889 			{
890 				const auto bufferInfo = makeDescriptorBufferInfo(bufferResource->get(), 0ull, bufferSize);
891 				updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &bufferInfo);
892 			}
893 			break;
894 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
895 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
896 			{
897 				auto descriptorImageInfo = makeDescriptorImageInfo(sampler.get(), imageResourceView.get(), imageDescriptorLayout);
898 				updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &descriptorImageInfo);
899 			}
900 			break;
901 		default:
902 			DE_ASSERT(false); break;
903 		}
904 
905 		updateBuilder.update(vkd, device);
906 	}
907 
908 	// Shader modules.
909 	Move<VkShaderModule> taskShader;
910 	Move<VkShaderModule> meshShader;
911 	Move<VkShaderModule> fragShader;
912 
913 	const auto& binaries = m_context.getBinaryCollection();
914 
915 	if (m_params.needsTask())
916 		taskShader = createShaderModule(vkd, device, binaries.get("task"), 0u);
917 	meshShader = createShaderModule(vkd, device, binaries.get("mesh"), 0u);
918 	fragShader = createShaderModule(vkd, device, binaries.get("frag"), 0u);
919 
920 	using PushConstantStruct = TestParams::PushConstantStruct;
921 
922 	// Pipeline layout, render pass, framebuffer.
923 	const auto pcSize			= static_cast<uint32_t>(sizeof(PushConstantStruct));
924 	const auto pcRange			= makePushConstantRange(resourceStages, 0u, pcSize);
925 	const auto pipelineLayout	= makePipelineLayout(vkd, device, setLayout.get(), &pcRange);
926 	const auto renderPass		= createCustomRenderPass(vkd, device, imageFormat, m_params);
927 	const auto framebuffer		= makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), imageExtent.width, imageExtent.height);
928 
929 	// Pipeline.
930 	std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
931 	std::vector<VkRect2D>	scissors	(1u, makeRect2D(imageExtent));
932 	const auto				pipeline	= makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(), fragShader.get(), renderPass.get(), viewports, scissors);
933 
934 	// Command pool and buffer.
935 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
936 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
937 	const auto cmdBuffer	= cmdBufferPtr.get();
938 
939 	beginCommandBuffer(vkd, cmdBuffer);
940 
941 	if (m_params.fromStage == Stage::HOST)
942 	{
943 		// Prepare buffer from host when the source stage is the host.
944 		DE_ASSERT(useBufferResource);
945 
946 		auto& resourceBufferAlloc	= bufferResource->getAllocation();
947 		void* resourceBufferDataPtr	= resourceBufferAlloc.getHostPtr();
948 
949 		deMemcpy(resourceBufferDataPtr, &m_params.testValue, sizeof(m_params.testValue));
950 		flushAlloc(vkd, device, resourceBufferAlloc);
951 	}
952 	else if (m_params.fromStage == Stage::TRANSFER)
953 	{
954 		// Put value in host-coherent buffer and transfer it to the resource buffer or image.
955 		deMemcpy(hostCoherentDataPtr, &m_params.testValue, sizeof(m_params.testValue));
956 		hostToTransferMemoryBarrier(vkd, cmdBuffer);
957 
958 		if (useBufferResource)
959 		{
960 			const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
961 			vkd.cmdCopyBuffer(cmdBuffer, hostCoherentBuffer->get(), bufferResource->get(), 1u, &copyRegion);
962 		}
963 		else
964 		{
965 			// Move image to the right layout for transfer.
966 			const auto newLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
967 			if (newLayout != currentLayout)
968 			{
969 				const auto preCopyBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, currentLayout, newLayout, imageResource->get(), colorSRR);
970 				vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
971 				currentLayout = newLayout;
972 			}
973 			const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
974 			vkd.cmdCopyBufferToImage(cmdBuffer, hostCoherentBuffer->get(), imageResource->get(), currentLayout, 1u, &copyRegion);
975 		}
976 	}
977 	else if (m_params.fromStage == Stage::TASK || m_params.fromStage == Stage::MESH)
978 	{
979 		// The image or buffer will be written to from shaders. Images need to be in the right layout.
980 		if (useImageResource)
981 		{
982 			const auto newLayout = VK_IMAGE_LAYOUT_GENERAL;
983 			if (newLayout != currentLayout)
984 			{
985 				const auto preWriteBarrier = makeImageMemoryBarrier(0u, (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT), currentLayout, newLayout, imageResource->get(), colorSRR);
986 				vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, fromStageFlags, 0u, 0u, nullptr, 0u, nullptr, 1u, &preWriteBarrier);
987 				currentLayout = newLayout;
988 			}
989 		}
990 	}
991 	else
992 	{
993 		DE_ASSERT(false);
994 	}
995 
996 	// If the resource is going to be read from shaders, we'll insert the main barrier before running the pipeline.
997 	if (isShaderStage(m_params.toStage) && !needsTwoDrawCalls)
998 	{
999 		if (m_params.barrierType == BarrierType::GENERAL)
1000 		{
1001 			const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1002 			vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 1u, &memoryBarrier, 0u, nullptr, 0u, nullptr);
1003 		}
1004 		else if (m_params.barrierType == BarrierType::SPECIFIC)
1005 		{
1006 			if (useBufferResource)
1007 			{
1008 				const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1009 				vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
1010 			}
1011 			else
1012 			{
1013 				const auto newLayout	= (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
1014 				const auto imageBarrier	= makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR);
1015 
1016 				vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 0u, nullptr, 1u, &imageBarrier);
1017 				currentLayout = newLayout;
1018 			}
1019 		}
1020 		else
1021 		{
1022 			DE_ASSERT(false);
1023 		}
1024 	}
1025 
1026 	if (needsTwoDrawCalls)
1027 	{
1028 		// Transition image to the general layout before writing to it. When we need two draw calls (because the image will be
1029 		// written to and read from a shader stage), the layout will always be general.
1030 		if (useImageResource)
1031 		{
1032 			const auto newLayout	= VK_IMAGE_LAYOUT_GENERAL;
1033 			const auto imageBarrier	= makeImageMemoryBarrier(0u, writeAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR);
1034 
1035 			vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, fromStageFlags, 0u, 0u, nullptr, 0u, nullptr, 1u, &imageBarrier);
1036 			currentLayout = newLayout;
1037 		}
1038 	}
1039 
1040 	// Run the pipeline.
1041 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), tcu::UVec4(0u));
1042 	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
1043 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
1044 	if (needsTwoDrawCalls)
1045 	{
1046 		// The first draw call will write to the resource and the second one will read from the resource.
1047 		PushConstantStruct pcData;
1048 
1049 		pcData.writeVal	= 1u;
1050 		pcData.readVal	= 0u;
1051 
1052 		vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData);
1053 		vkd.cmdDrawMeshTasksNV(cmdBuffer, 1u, 0u);
1054 
1055 		// Use a barrier between both draw calls. The barrier must be generic because:
1056 		//    * VUID-vkCmdPipelineBarrier-bufferMemoryBarrierCount-01178 forbids using buffer barriers inside render passes.
1057 		//    * VUID-vkCmdPipelineBarrier-image-04073 forbids using image memory barriers inside render passes with resources that are not attachments.
1058 		if (m_params.barrierType == BarrierType::GENERAL)
1059 		{
1060 			const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1061 			vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 1u, &memoryBarrier, 0u, nullptr, 0u, nullptr);
1062 		}
1063 		else
1064 		{
1065 			DE_ASSERT(false);
1066 		}
1067 
1068 		pcData.writeVal	= 0u;
1069 		pcData.readVal	= 1u;
1070 
1071 		vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData);
1072 		vkd.cmdDrawMeshTasksNV(cmdBuffer, 1u, 0u);
1073 	}
1074 	else
1075 	{
1076 		PushConstantStruct pcData;
1077 		pcData.writeVal	= 1u;
1078 		pcData.readVal	= 1u;
1079 
1080 		vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData);
1081 		vkd.cmdDrawMeshTasksNV(cmdBuffer, 1u, 0u);
1082 	}
1083 	endRenderPass(vkd, cmdBuffer);
1084 
1085 	// If the resource was written to from the shaders, insert the main barrier after running the pipeline.
1086 	if (isShaderStage(m_params.fromStage) && !needsTwoDrawCalls)
1087 	{
1088 		if (m_params.barrierType == BarrierType::GENERAL)
1089 		{
1090 			const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1091 			vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 1u, &memoryBarrier, 0u, nullptr, 0u, nullptr);
1092 		}
1093 		else if (m_params.barrierType == BarrierType::SPECIFIC)
1094 		{
1095 			if (useBufferResource)
1096 			{
1097 				const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1098 				vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
1099 			}
1100 			else
1101 			{
1102 				// Note: the image will only be read from shader stages (which is covered in BarrierType::DEPENDENCY) or from the transfer stage.
1103 				const auto newLayout	= (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1104 				const auto imageBarrier	= makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR);
1105 
1106 				vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 0u, nullptr, 1u, &imageBarrier);
1107 				currentLayout = newLayout;
1108 			}
1109 		}
1110 		// For subpass dependencies, they have already been included in the render pass.
1111 	}
1112 
1113 	// Read resource from the destination stage if needed.
1114 	if (m_params.toStage == Stage::HOST)
1115 	{
1116 		// Nothing to do. The test value should be in the resource buffer already, which is host-visible.
1117 	}
1118 	else if (m_params.toStage == Stage::TRANSFER)
1119 	{
1120 		// Copy value from resource to host-coherent buffer to be verified later.
1121 		if (useBufferResource)
1122 		{
1123 			const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
1124 			vkd.cmdCopyBuffer(cmdBuffer, bufferResource->get(), hostCoherentBuffer->get(), 1u, &copyRegion);
1125 		}
1126 		else
1127 		{
1128 			const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1129 			vkd.cmdCopyImageToBuffer(cmdBuffer, imageResource->get(), currentLayout, hostCoherentBuffer->get(), 1u, &copyRegion);
1130 		}
1131 
1132 		transferToHostMemoryBarrier(vkd, cmdBuffer);
1133 	}
1134 
1135 	// If the output value will be available in the color buffer, take the chance to transfer its contents to a host-coherent buffer.
1136 	BufferWithMemoryPtr colorVerificationBuffer;
1137 	void*				colorVerificationDataPtr = nullptr;
1138 
1139 	if (valueInColorBuffer(m_params.toStage))
1140 	{
1141 		const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
1142 		colorVerificationBuffer		= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
1143 		colorVerificationDataPtr	= colorVerificationBuffer->getAllocation().getHostPtr();
1144 
1145 		const auto srcAccess	= (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1146 		const auto dstAccess	= VK_ACCESS_TRANSFER_READ_BIT;
1147 		const auto colorBarrier	= makeImageMemoryBarrier(srcAccess, dstAccess, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorSRR);
1148 		vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &colorBarrier);
1149 
1150 		const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1151 		vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorVerificationBuffer->get(), 1u, &copyRegion);
1152 
1153 		transferToHostMemoryBarrier(vkd, cmdBuffer);
1154 	}
1155 
1156 
1157 	endCommandBuffer(vkd, cmdBuffer);
1158 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1159 
1160 	// Verify output resources as needed.
1161 
1162 	if (valueInAuxiliarDestBuffer(m_params.toStage))
1163 	{
1164 		uint32_t bufferValue;
1165 		deMemcpy(&bufferValue, hostCoherentDataPtr, sizeof(bufferValue));
1166 
1167 		if (bufferValue != m_params.testValue)
1168 		{
1169 			std::ostringstream msg;
1170 			msg << "Unexpected value in auxiliar host-coherent buffer: found " << bufferValue << " and expected " << m_params.testValue;
1171 			TCU_FAIL(msg.str());
1172 		}
1173 	}
1174 
1175 	if (valueInResourceBuffer(m_params.toStage))
1176 	{
1177 		auto&		resourceBufferAlloc		= bufferResource->getAllocation();
1178 		void*		resourceBufferDataPtr	= resourceBufferAlloc.getHostPtr();
1179 		uint32_t	bufferValue;
1180 
1181 		invalidateAlloc(vkd, device, resourceBufferAlloc);
1182 		deMemcpy(&bufferValue, resourceBufferDataPtr, sizeof(bufferValue));
1183 
1184 		if (bufferValue != m_params.testValue)
1185 		{
1186 			std::ostringstream msg;
1187 			msg << "Unexpected value in resource buffer: found " << bufferValue << " and expected " << m_params.testValue;
1188 			TCU_FAIL(msg.str());
1189 		}
1190 	}
1191 
1192 	if (valueInColorBuffer(m_params.toStage))
1193 	{
1194 		uint32_t bufferValue;
1195 		deMemcpy(&bufferValue, colorVerificationDataPtr, sizeof(bufferValue));
1196 
1197 		if (bufferValue != m_params.testValue)
1198 		{
1199 			std::ostringstream msg;
1200 			msg << "Unexpected value in color verification buffer: found " << bufferValue << " and expected " << m_params.testValue;
1201 			TCU_FAIL(msg.str());
1202 		}
1203 	}
1204 
1205 	return tcu::TestStatus::pass("Pass");
1206 }
1207 
1208 } // anonymous
1209 
createMeshShaderSyncTests(tcu::TestContext & testCtx)1210 tcu::TestCaseGroup* createMeshShaderSyncTests (tcu::TestContext& testCtx)
1211 {
1212 	const struct
1213 	{
1214 		Stage		fromStage;
1215 		Stage		toStage;
1216 	} stageCombinations[] =
1217 	{
1218 		// Combinations where the source and destination stages involve mesh shaders.
1219 		// Note: this could be tested procedurally.
1220 		{	Stage::HOST,		Stage::TASK			},
1221 		{	Stage::HOST,		Stage::MESH			},
1222 		{	Stage::TRANSFER,	Stage::TASK			},
1223 		{	Stage::TRANSFER,	Stage::MESH			},
1224 		{	Stage::TASK,		Stage::MESH			},
1225 		{	Stage::TASK,		Stage::FRAG			},
1226 		{	Stage::TASK,		Stage::TRANSFER		},
1227 		{	Stage::TASK,		Stage::HOST			},
1228 		{	Stage::MESH,		Stage::FRAG			},
1229 		{	Stage::MESH,		Stage::TRANSFER		},
1230 		{	Stage::MESH,		Stage::HOST			},
1231 	};
1232 
1233 	const struct
1234 	{
1235 		ResourceType	resourceType;
1236 		const char*		name;
1237 	} resourceTypes[] =
1238 	{
1239 		{ ResourceType::UNIFORM_BUFFER,	"uniform_buffer"	},
1240 		{ ResourceType::STORAGE_BUFFER,	"storage_buffer"	},
1241 		{ ResourceType::STORAGE_IMAGE,	"storage_image"		},
1242 		{ ResourceType::SAMPLED_IMAGE,	"sampled_image"		},
1243 	};
1244 
1245 	const struct
1246 	{
1247 		BarrierType		barrierType;
1248 		const char*		name;
1249 	} barrierTypes[] =
1250 	{
1251 		{	BarrierType::GENERAL,		"memory_barrier"		},
1252 		{	BarrierType::SPECIFIC,		"specific_barrier"		},
1253 	};
1254 
1255 	const struct
1256 	{
1257 		WriteAccess		writeAccess;
1258 		const char*		name;
1259 	} writeAccesses[] =
1260 	{
1261 		{	WriteAccess::HOST_WRITE,		"host_write"		},
1262 		{	WriteAccess::TRANSFER_WRITE,	"transfer_write"	},
1263 		{	WriteAccess::SHADER_WRITE,		"shader_write"		},
1264 	};
1265 
1266 	const struct
1267 	{
1268 		ReadAccess		readAccess;
1269 		const char*		name;
1270 	} readAccesses[] =
1271 	{
1272 		{	ReadAccess::HOST_READ,		"host_read"		},
1273 		{	ReadAccess::TRANSFER_READ,	"transfer_read"	},
1274 		{	ReadAccess::SHADER_READ,	"shader_read"	},
1275 		{	ReadAccess::UNIFORM_READ,	"uniform_read"	},
1276 	};
1277 
1278 	uint32_t testValue = 1628510124u;
1279 
1280 	GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "synchronization", "Mesh Shader synchronization tests"));
1281 
1282 	for (const auto& stageCombination : stageCombinations)
1283 	{
1284 		const std::string	combinationName		= de::toString(stageCombination.fromStage) + "_to_" + de::toString(stageCombination.toStage);
1285 		GroupPtr			combinationGroup	(new tcu::TestCaseGroup(testCtx, combinationName.c_str(), ""));
1286 
1287 		for (const auto& resourceCase : resourceTypes)
1288 		{
1289 			if (!canWriteTo(stageCombination.fromStage, resourceCase.resourceType))
1290 				continue;
1291 
1292 			if (!canReadFrom(stageCombination.toStage, resourceCase.resourceType))
1293 				continue;
1294 
1295 			GroupPtr resourceGroup (new tcu::TestCaseGroup(testCtx, resourceCase.name, ""));
1296 
1297 			for (const auto& barrierCase : barrierTypes)
1298 			{
1299 				// See note above about VUID-vkCmdPipelineBarrier-bufferMemoryBarrierCount-01178 and VUID-vkCmdPipelineBarrier-image-04073.
1300 				if (readAndWriteFromShaders(stageCombination.fromStage, stageCombination.toStage) && barrierCase.barrierType == BarrierType::SPECIFIC)
1301 					continue;
1302 
1303 				GroupPtr barrierGroup (new tcu::TestCaseGroup(testCtx, barrierCase.name, ""));
1304 
1305 				for (const auto& writeCase	: writeAccesses)
1306 				for (const auto& readCase	: readAccesses)
1307 				{
1308 					if (!canReadResourceAsAccess(resourceCase.resourceType, readCase.readAccess))
1309 						continue;
1310 					if (!canWriteResourceAsAccess(resourceCase.resourceType, writeCase.writeAccess))
1311 						continue;
1312 					if (!canReadFromStageAsAccess(stageCombination.toStage, readCase.readAccess))
1313 						continue;
1314 					if (!canWriteFromStageAsAccess(stageCombination.fromStage, writeCase.writeAccess))
1315 						continue;
1316 
1317 					const std::string accessCaseName = writeCase.name + std::string("_") + readCase.name;
1318 
1319 					const TestParams testParams =
1320 					{
1321 						stageCombination.fromStage,	//	Stage			fromStage;
1322 						stageCombination.toStage,	//	Stage			toStage;
1323 						resourceCase.resourceType,	//	ResourceType	resourceType;
1324 						barrierCase.barrierType,	//	BarrierType		barrierType;
1325 						writeCase.writeAccess,		//	WriteAccess		writeAccess;
1326 						readCase.readAccess,		//	ReadAccess		readAccess;
1327 						testValue++,				//	uint32_t		testValue;
1328 					};
1329 
1330 					barrierGroup->addChild(new MeshShaderSyncCase(testCtx, accessCaseName, "", testParams));
1331 				}
1332 
1333 				resourceGroup->addChild(barrierGroup.release());
1334 			}
1335 
1336 			combinationGroup->addChild(resourceGroup.release());
1337 		}
1338 
1339 		mainGroup->addChild(combinationGroup.release());
1340 	}
1341 
1342 	return mainGroup.release();
1343 }
1344 
1345 } // MeshShader
1346 } // vkt
1347