• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 Advanced Micro Devices, Inc.
6  * Copyright (c) 2019 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Tests for VK_AMD_buffer_marker
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktApiBufferMarkerTests.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "vktExternalMemoryUtil.hpp"
30 #include "vktCustomInstancesDevices.hpp"
31 #include "vkPlatform.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "tcuCommandLine.hpp"
39 #include "deUniquePtr.hpp"
40 #include "deRandom.hpp"
41 #include "deSTLUtil.hpp"
42 
43 #include <vector>
44 
45 namespace vkt
46 {
47 namespace api
48 {
49 namespace
50 {
51 using namespace vk;
52 using de::UniquePtr;
53 using de::MovePtr;
54 using namespace vkt::ExternalMemoryUtil;
55 
56 //! Common test data related to the device
57 struct WorkingDevice
58 {
59 	Move<VkDevice>          logicalDevice;
60 	MovePtr<DeviceDriver>   deviceDriver;
61 	MovePtr<Allocator>      allocator;
62 	VkQueue                 queue;
63 	deUint32                queueFamilyIdx;
64 	VkQueueFamilyProperties queueProps;
65 };
66 
queueFamilyMatchesTestCase(const VkQueueFamilyProperties & props,VkQueueFlagBits testQueue)67 bool queueFamilyMatchesTestCase(const VkQueueFamilyProperties& props, VkQueueFlagBits testQueue)
68 {
69 	// The goal is to find a queue family that most accurately represents the required queue flag.  For example, if flag is
70 	// VK_QUEUE_TRANSFER_BIT, we want to target transfer-only queues for such a test case rather than universal queues which
71 	// may include VK_QUEUE_TRANSFER_BIT along with other queue flags.
72 	const VkQueueFlags flags = props.queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT);
73 
74 	// for VK_QUEUE_TRANSFER_BIT, target transfer-only queues:
75 	if (testQueue == VK_QUEUE_TRANSFER_BIT)
76 		return (flags == VK_QUEUE_TRANSFER_BIT);
77 
78 	// for VK_QUEUE_COMPUTE_BIT, target compute only queues
79 	if (testQueue == VK_QUEUE_COMPUTE_BIT)
80 		return ((flags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) == VK_QUEUE_COMPUTE_BIT);
81 
82 	// for VK_QUEUE_GRAPHICS_BIT, target universal queues (queues which support graphics)
83 	if (testQueue == VK_QUEUE_GRAPHICS_BIT)
84 		return ((flags & VK_QUEUE_GRAPHICS_BIT) != 0);
85 
86 	DE_FATAL("Unexpected test queue flag");
87 
88 	return false;
89 }
90 
91 // We create a custom device because we don't want to always use the universal queue.
createDeviceWithExtension(Context & context,WorkingDevice & wd,VkQueueFlagBits testQueue,bool hostPtr,size_t offset)92 void createDeviceWithExtension (Context& context, WorkingDevice& wd, VkQueueFlagBits testQueue, bool hostPtr, size_t offset)
93 {
94 	const PlatformInterface&	vkp				= context.getPlatformInterface();
95 	const VkInstance			instance		= context.getInstance();
96 	const InstanceInterface&	instanceDriver	= context.getInstanceInterface();
97 	const VkPhysicalDevice		physicalDevice	= context.getPhysicalDevice();
98 	const auto					useValidation	= context.getTestContext().getCommandLine().isValidationEnabled();
99 
100 	// Create a device with extension enabled and a queue with a family which supports the buffer marker extension
101 	const std::vector<VkQueueFamilyProperties>	queueFamilyProperties	= getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
102 	const float									queuePriority			= 1.0f;
103 	VkDeviceQueueCreateInfo						queueCreateInfo;
104 	deMemset(&queueCreateInfo, 0, sizeof(queueCreateInfo));
105 
106 	for (deUint32 familyIdx = 0; familyIdx < queueFamilyProperties.size(); ++familyIdx)
107 	{
108 		if (queueFamilyMatchesTestCase(queueFamilyProperties[familyIdx], testQueue) &&
109 			queueFamilyProperties[familyIdx].queueCount > 0)
110 		{
111 			queueCreateInfo.sType				= VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
112 			queueCreateInfo.pNext				= DE_NULL;
113 			queueCreateInfo.pQueuePriorities	= &queuePriority;
114 			queueCreateInfo.queueCount			= 1;
115 			queueCreateInfo.queueFamilyIndex	= familyIdx;
116 
117 			break;
118 		}
119 	}
120 
121 	if (queueCreateInfo.queueCount == 0)
122 	{
123 		TCU_THROW(NotSupportedError, "No compatible queue family for this test case");
124 	}
125 
126 	std::vector<const char*> cstrDeviceExtensions;
127 
128 	cstrDeviceExtensions.push_back("VK_AMD_buffer_marker");
129 
130 	if (hostPtr)
131 		cstrDeviceExtensions.push_back("VK_EXT_external_memory_host");
132 
133 	const VkDeviceCreateInfo deviceInfo =
134 	{
135 		VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,				// VkStructureType					sType;
136 		DE_NULL,											// const void*						pNext;
137 		0u,													// VkDeviceCreateFlags				flags;
138 		1,													// deUint32							queueCreateInfoCount;
139 		&queueCreateInfo,									// const VkDeviceQueueCreateInfo*	pQueueCreateInfos;
140 		0u,													// deUint32							enabledLayerCount;
141 		DE_NULL,											// const char* const*				ppEnabledLayerNames;
142 		static_cast<deUint32>(cstrDeviceExtensions.size()),	// deUint32							enabledExtensionCount;
143 		de::dataOrNull(cstrDeviceExtensions),				// const char* const*				ppEnabledExtensionNames;
144 		&context.getDeviceFeatures(),						// const VkPhysicalDeviceFeatures*	pEnabledFeatures;
145 	};
146 
147 	wd.logicalDevice	= createCustomDevice(useValidation, vkp, instance, instanceDriver, physicalDevice, &deviceInfo);
148 	wd.deviceDriver		= MovePtr<DeviceDriver>(new DeviceDriver(vkp, instance, *wd.logicalDevice, context.getUsedApiVersion()));
149 	const SimpleAllocator::OptionalOffsetParams offsetParams({ context.getDeviceProperties().limits.nonCoherentAtomSize, static_cast<VkDeviceSize>(offset) });
150 	wd.allocator		= MovePtr<Allocator>(new SimpleAllocator(*wd.deviceDriver, *wd.logicalDevice, getPhysicalDeviceMemoryProperties(instanceDriver, physicalDevice), offsetParams));
151 	wd.queueFamilyIdx	= queueCreateInfo.queueFamilyIndex;
152 	wd.queue			= getDeviceQueue(*wd.deviceDriver, *wd.logicalDevice, wd.queueFamilyIdx, 0u);
153 	wd.queueProps		= queueFamilyProperties[queueCreateInfo.queueFamilyIndex];
154 }
155 
writeHostMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,vk::VkDeviceMemory memory,size_t size,size_t memorySize,const void * data)156 void writeHostMemory (const vk::DeviceInterface&	vkd,
157 					  vk::VkDevice					device,
158 					  vk::VkDeviceMemory			memory,
159 					  size_t						size,
160 					  size_t						memorySize,
161 					  const void*					data)
162 {
163 	void* const ptr = vk::mapMemory(vkd, device, memory, 0, memorySize, 0);
164 
165 	deMemcpy(ptr, data, size);
166 
167 	flushMappedMemoryRange(vkd, device, memory, 0, memorySize);
168 
169 	vkd.unmapMemory(device, memory);
170 }
171 
invalidateHostMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,vk::VkDeviceMemory memory,size_t size)172 void invalidateHostMemory (const vk::DeviceInterface&	vkd,
173 						   vk::VkDevice					device,
174 						   vk::VkDeviceMemory			memory,
175 						   size_t						size)
176 {
177 	vk::mapMemory(vkd, device, memory, 0, size, 0);
178 
179 	invalidateMappedMemoryRange(vkd, device, memory, 0, size);
180 
181 	vkd.unmapMemory(device, memory);
182 }
183 
checkMarkerBuffer(const DeviceInterface & vk,VkDevice device,const MovePtr<vk::Allocation> & memory,const std::vector<deUint32> & expected,size_t size,bool useHostMemory)184 bool checkMarkerBuffer	(const DeviceInterface& vk, VkDevice device, const MovePtr<vk::Allocation>& memory,
185 						 const std::vector<deUint32>& expected, size_t size, bool useHostMemory)
186 {
187 	if (useHostMemory)
188 	{
189 		invalidateHostMemory(vk, device, memory->getMemory(), size);
190 	}
191 	else
192 	{
193 		invalidateAlloc(vk, device, *memory);
194 	}
195 
196 	const deUint32* data = reinterpret_cast<const deUint32*>(static_cast<const char*>(memory->getHostPtr()));
197 
198 	for (size_t i = 0; i < expected.size(); ++i)
199 	{
200 		if (data[i] != expected[i])
201 			return false;
202 	}
203 
204 	return true;
205 }
206 
207 struct BaseTestParams
208 {
209 	VkQueueFlagBits			testQueue;	// Queue type that this test case targets
210 	VkPipelineStageFlagBits stage;		// Pipeline stage where any marker writes for this test case occur in
211 	deUint32				size;		// Number of buffer markers
212 	bool					useHostPtr;	// Whether to use host pointer as backing buffer memory
213 	size_t					offset;		// The offset of the data in the buffer
214 };
215 
chooseExternalMarkerMemoryType(const DeviceInterface & vkd,VkDevice device,VkExternalMemoryHandleTypeFlagBits externalType,deUint32 allowedBits,MovePtr<ExternalHostMemory> & hostMemory)216 deUint32 chooseExternalMarkerMemoryType(const DeviceInterface&				vkd,
217 										VkDevice							device,
218 										VkExternalMemoryHandleTypeFlagBits	externalType,
219 										deUint32							allowedBits,
220 										MovePtr<ExternalHostMemory>&		hostMemory)
221 {
222 	VkMemoryHostPointerPropertiesEXT props =
223 	{
224 		vk::VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
225 		DE_NULL,
226 		0u,
227 	};
228 
229 	if (vkd.getMemoryHostPointerPropertiesEXT(device, externalType, hostMemory->data, &props) == VK_SUCCESS)
230 	{
231 		allowedBits &= props.memoryTypeBits;
232 	}
233 
234 	return deInt32BitScan((deInt32 *)&allowedBits);
235 }
236 
237 class ExternalHostAllocation : public Allocation
238 {
239 public:
ExternalHostAllocation(Move<VkDeviceMemory> mem,void * hostPtr,size_t offset)240 	ExternalHostAllocation(Move<VkDeviceMemory> mem, void* hostPtr, size_t offset) : Allocation(*mem, offset, hostPtr), m_memHolder(mem) { }
241 
242 private:
243 	const Unique<VkDeviceMemory>	m_memHolder;
244 };
245 
createMarkerBufferMemory(const InstanceInterface & vki,const DeviceInterface & vkd,VkPhysicalDevice physicalDevice,VkDevice device,VkBuffer buffer,size_t bufferOffset,MovePtr<Allocator> & allocator,const MemoryRequirement allocRequirement,bool externalHostPtr,MovePtr<ExternalHostMemory> & hostMemory,MovePtr<Allocation> & deviceMemory)246 void createMarkerBufferMemory(const InstanceInterface&		vki,
247 							const DeviceInterface&			vkd,
248 							VkPhysicalDevice				physicalDevice,
249 							VkDevice						device,
250 							VkBuffer						buffer,
251 							size_t							bufferOffset,
252 							MovePtr<Allocator>&				allocator,
253 							const MemoryRequirement			allocRequirement,
254 							bool							externalHostPtr,
255 							MovePtr<ExternalHostMemory>&	hostMemory,
256 							MovePtr<Allocation>&			deviceMemory)
257 {
258 	VkMemoryRequirements memReqs = getBufferMemoryRequirements(vkd, device, buffer);
259 
260 	if (externalHostPtr == false)
261 	{
262 		deviceMemory = allocator->allocate(memReqs, allocRequirement);
263 	}
264 	else
265 	{
266 		const VkExternalMemoryHandleTypeFlagBits externalType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
267 
268 		const VkPhysicalDeviceExternalMemoryHostPropertiesEXT hostProps = getPhysicalDeviceExternalMemoryHostProperties(vki, physicalDevice);
269 		bufferOffset = deAlignSize(bufferOffset, static_cast<size_t>(memReqs.alignment));
270 		hostMemory = MovePtr<ExternalHostMemory>(new ExternalHostMemory(memReqs.size + bufferOffset, hostProps.minImportedHostPointerAlignment));
271 
272 		const deUint32 externalMemType = chooseExternalMarkerMemoryType(vkd, device, externalType, memReqs.memoryTypeBits, hostMemory);
273 
274 		if (externalMemType == VK_MAX_MEMORY_TYPES)
275 		{
276 			TCU_FAIL("Failed to find compatible external host memory type for marker buffer");
277 		}
278 
279 		const VkImportMemoryHostPointerInfoEXT	importInfo =
280 		{
281 			VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
282 			DE_NULL,
283 			externalType,
284 			hostMemory->data
285 		};
286 
287 		const VkMemoryAllocateInfo				info =
288 		{
289 			VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
290 			(const void*)&importInfo,
291 			hostMemory->size,
292 			externalMemType
293 		};
294 
295 		deviceMemory = MovePtr<Allocation>(new ExternalHostAllocation(allocateMemory(vkd, device, &info),
296 																	  (((deUint8 *)hostMemory->data) + bufferOffset),
297 																	  bufferOffset));
298 	}
299 
300 	VK_CHECK(vkd.bindBufferMemory(device, buffer, deviceMemory->getMemory(), deviceMemory->getOffset()));
301 }
302 
bufferMarkerSequential(Context & context,BaseTestParams params)303 tcu::TestStatus bufferMarkerSequential(Context& context, BaseTestParams params)
304 {
305 	WorkingDevice wd;
306 
307 	createDeviceWithExtension(context, wd, params.testQueue, params.useHostPtr, params.offset);
308 
309 	const DeviceInterface&			vk(*wd.deviceDriver);
310 	const VkDevice					device(*wd.logicalDevice);
311 	const VkDeviceSize				markerBufferSize(params.size * sizeof(deUint32));
312 	VkExternalMemoryBufferCreateInfo	externalMemoryBufferCreateInfo =
313 	{
314 		VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
315 		DE_NULL,
316 		VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT
317 	};
318 	VkBufferCreateInfo				bufferCreateInfo = makeBufferCreateInfo(markerBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
319 	if (params.useHostPtr)
320 		bufferCreateInfo.pNext = &externalMemoryBufferCreateInfo;
321 	Move<VkBuffer>					markerBuffer(createBuffer(vk, device, &bufferCreateInfo));
322 	MovePtr<ExternalHostMemory>		hostMemory;
323 	MovePtr<Allocation>				markerMemory;
324 
325 	createMarkerBufferMemory(context.getInstanceInterface(), vk, context.getPhysicalDevice(), device,
326 							 *markerBuffer, params.offset, wd.allocator, MemoryRequirement::HostVisible, params.useHostPtr, hostMemory, markerMemory);
327 
328 	de::Random						rng(12345 ^ params.size);
329 	std::vector<deUint32>			expected(params.size);
330 
331 	for (size_t i = 0; i < params.size; ++i)
332 		expected[i] = rng.getUint32();
333 
334 	if (params.useHostPtr)
335 	{
336 		writeHostMemory(vk, device, markerMemory->getMemory(), static_cast<size_t>(markerBufferSize), hostMemory->size, &expected[0]);
337 	}
338 	else
339 	{
340 		deMemcpy(markerMemory->getHostPtr(), &expected[0], static_cast<size_t>(markerBufferSize));
341 		flushMappedMemoryRange(vk, device, markerMemory->getMemory(), markerMemory->getOffset(), VK_WHOLE_SIZE);
342 	}
343 
344 	const Unique<VkCommandPool>		cmdPool(createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, wd.queueFamilyIdx));
345 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
346 
347 	beginCommandBuffer(vk, *cmdBuffer);
348 
349 	for (size_t i = 0; i < params.size; ++i)
350 	{
351 		vk.cmdWriteBufferMarkerAMD(*cmdBuffer, params.stage, *markerBuffer, static_cast<VkDeviceSize>(sizeof(deUint32) * i), expected[i]);
352 	}
353 
354 	const VkMemoryBarrier memoryDep =
355 	{
356 		VK_STRUCTURE_TYPE_MEMORY_BARRIER,
357 		DE_NULL,
358 		VK_ACCESS_TRANSFER_WRITE_BIT,
359 		VK_ACCESS_HOST_READ_BIT,
360 	};
361 
362 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memoryDep, 0, DE_NULL, 0, DE_NULL);
363 
364 	VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
365 
366 	submitCommandsAndWait(vk, device, wd.queue, *cmdBuffer);
367 
368 	if (!checkMarkerBuffer(vk, device, markerMemory, expected, params.useHostPtr ? hostMemory->size : 0, params.useHostPtr))
369 		return tcu::TestStatus::fail("Some marker values were incorrect");
370 
371 	return tcu::TestStatus::pass("Pass");
372 }
373 
bufferMarkerOverwrite(Context & context,BaseTestParams params)374 tcu::TestStatus bufferMarkerOverwrite(Context& context, BaseTestParams params)
375 {
376 	WorkingDevice wd;
377 
378 	createDeviceWithExtension(context, wd, params.testQueue, params.useHostPtr, params.offset);
379 
380 	const DeviceInterface&				vk(*wd.deviceDriver);
381 	const VkDevice						device(*wd.logicalDevice);
382 	const VkDeviceSize					markerBufferSize(params.size * sizeof(deUint32));
383 	VkExternalMemoryBufferCreateInfo	externalMemoryBufferCreateInfo =
384 	{
385 		VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
386 		DE_NULL,
387 		VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT
388 	};
389 	VkBufferCreateInfo					bufferCreateInfo	= makeBufferCreateInfo(markerBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
390 	if (params.useHostPtr)
391 		bufferCreateInfo.pNext = &externalMemoryBufferCreateInfo;
392 
393 	Move<VkBuffer>						markerBuffer		(createBuffer(vk, device, &bufferCreateInfo));
394 	MovePtr<ExternalHostMemory>			hostMemory;
395 	MovePtr<Allocation>					markerMemory;
396 
397 	createMarkerBufferMemory(context.getInstanceInterface(), vk, context.getPhysicalDevice(), device,
398 							 *markerBuffer, params.offset, wd.allocator, MemoryRequirement::HostVisible, params.useHostPtr, hostMemory, markerMemory);
399 
400 	de::Random						rng(12345 ^ params.size);
401 	std::vector<deUint32>			expected(params.size);
402 
403 	for (size_t i = 0; i < params.size; ++i)
404 		expected[i] = 0;
405 
406 	if (params.useHostPtr)
407 	{
408 		writeHostMemory(vk, device, markerMemory->getMemory(), static_cast<size_t>(markerBufferSize), hostMemory->size, &expected[0]);
409 	}
410 	else
411 	{
412 		deMemcpy(markerMemory->getHostPtr(), &expected[0], static_cast<size_t>(markerBufferSize));
413 		flushMappedMemoryRange(vk, device, markerMemory->getMemory(), markerMemory->getOffset(), VK_WHOLE_SIZE);
414 	}
415 
416 	const Unique<VkCommandPool>		cmdPool(createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, wd.queueFamilyIdx));
417 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
418 
419 	beginCommandBuffer(vk, *cmdBuffer);
420 
421 	for (deUint32 i = 0; i < params.size * 10; ++i)
422 	{
423 		const deUint32 slot  = rng.getUint32() % static_cast<deUint32>(params.size);
424 		const deUint32 value = i;
425 
426 		expected[slot] = value;
427 
428 		vk.cmdWriteBufferMarkerAMD(*cmdBuffer, params.stage, *markerBuffer, static_cast<VkDeviceSize>(sizeof(deUint32) * slot), expected[slot]);
429 	}
430 
431 	const VkMemoryBarrier memoryDep = {
432 		VK_STRUCTURE_TYPE_MEMORY_BARRIER,
433 		DE_NULL,
434 		VK_ACCESS_TRANSFER_WRITE_BIT,
435 		VK_ACCESS_HOST_READ_BIT,
436 	};
437 
438 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memoryDep, 0, DE_NULL, 0, DE_NULL);
439 
440 	VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
441 
442 	submitCommandsAndWait(vk, device, wd.queue, *cmdBuffer);
443 
444 	if (!checkMarkerBuffer(vk, device, markerMemory, expected, params.useHostPtr ? hostMemory->size : 0, params.useHostPtr))
445 		return tcu::TestStatus::fail("Some marker values were incorrect");
446 
447 	return tcu::TestStatus::pass("Pass");
448 }
449 
450 enum MemoryDepMethod
451 {
452 	MEMORY_DEP_DRAW,
453 	MEMORY_DEP_DISPATCH,
454 	MEMORY_DEP_COPY
455 };
456 
457 struct MemoryDepParams
458 {
459 	BaseTestParams			base;
460 	MemoryDepMethod			method;
461 };
462 
463 enum MemoryDepOwner
464 {
465 	MEMORY_DEP_OWNER_NOBODY = 0,
466 	MEMORY_DEP_OWNER_MARKER = 1,
467 	MEMORY_DEP_OWNER_NON_MARKER = 2
468 };
469 
computeMemoryDepBarrier(const MemoryDepParams & params,MemoryDepOwner owner,VkAccessFlags * memoryDepAccess,VkPipelineStageFlags * executionScope)470 void computeMemoryDepBarrier(const MemoryDepParams&			params,
471 							 MemoryDepOwner					owner,
472 							 VkAccessFlags*					memoryDepAccess,
473 							 VkPipelineStageFlags*			executionScope)
474 {
475 	DE_ASSERT(owner != MEMORY_DEP_OWNER_NOBODY);
476 
477 	if (owner == MEMORY_DEP_OWNER_MARKER)
478 	{
479 		*memoryDepAccess = VK_ACCESS_TRANSFER_WRITE_BIT;
480 		*executionScope  = params.base.stage | VK_PIPELINE_STAGE_TRANSFER_BIT;
481 	}
482 	else
483 	{
484 		if (params.method == MEMORY_DEP_COPY)
485 		{
486 			*memoryDepAccess = VK_ACCESS_TRANSFER_WRITE_BIT;
487 			*executionScope  = VK_PIPELINE_STAGE_TRANSFER_BIT;
488 		}
489 		else if (params.method == MEMORY_DEP_DISPATCH)
490 		{
491 			*memoryDepAccess = VK_ACCESS_SHADER_WRITE_BIT;
492 			*executionScope  = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
493 		}
494 		else
495 		{
496 			*memoryDepAccess = VK_ACCESS_SHADER_WRITE_BIT;
497 			*executionScope  = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
498 		}
499 	}
500 }
501 
502 // Randomly do buffer marker writes and other operations (draws, dispatches) that shader-write to a shared buffer.  Insert pipeline barriers
503 // when necessary and make sure that the synchronization between marker writes and non-marker writes are correctly handled by the barriers.
bufferMarkerMemoryDep(Context & context,MemoryDepParams params)504 tcu::TestStatus bufferMarkerMemoryDep(Context& context, MemoryDepParams params)
505 {
506 	WorkingDevice wd;
507 
508 	createDeviceWithExtension(context, wd, params.base.testQueue, params.base.useHostPtr, params.base.offset);
509 
510 	VkBufferUsageFlags usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
511 
512 	if ((params.method == MEMORY_DEP_DRAW) || (params.method == MEMORY_DEP_DISPATCH))
513 		usageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
514 	else
515 		usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
516 
517 	const deUint32					numIters(1000);
518 	const DeviceInterface& vk(*wd.deviceDriver);
519 	const VkDevice					device(*wd.logicalDevice);
520 	const deUint32					size(params.base.size);
521 	const VkDeviceSize				markerBufferSize(params.base.size * sizeof(deUint32));
522 	VkExternalMemoryBufferCreateInfo	externalMemoryBufferCreateInfo =
523 	{
524 		VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
525 		DE_NULL,
526 		VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT
527 	};
528 	VkBufferCreateInfo				bufferCreateInfo = makeBufferCreateInfo(markerBufferSize, usageFlags);
529 	if (params.base.useHostPtr)
530 		bufferCreateInfo.pNext = &externalMemoryBufferCreateInfo;
531 	Move<VkBuffer>					markerBuffer(createBuffer(vk, device, &bufferCreateInfo));
532 	MovePtr<ExternalHostMemory>		hostMemory;
533 	MovePtr<Allocation>				markerMemory;
534 
535 	createMarkerBufferMemory(context.getInstanceInterface(), vk, context.getPhysicalDevice(), device,
536 		*markerBuffer, params.base.offset, wd.allocator, MemoryRequirement::HostVisible, params.base.useHostPtr, hostMemory, markerMemory);
537 
538 	de::Random						rng(size ^ params.base.size);
539 	std::vector<deUint32>			expected(params.base.size, 0);
540 
541 	Move<VkDescriptorPool>			descriptorPool;
542 	Move<VkDescriptorSetLayout>		descriptorSetLayout;
543 	Move<VkDescriptorSet>			descriptorSet;
544 	Move<VkPipelineLayout>			pipelineLayout;
545 	VkShaderStageFlags				pushConstantStage = 0;
546 
547 	if ((params.method == MEMORY_DEP_DRAW) || (params.method == MEMORY_DEP_DISPATCH))
548 	{
549 		DescriptorPoolBuilder descriptorPoolBuilder;
550 
551 		descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u);
552 		descriptorPool = descriptorPoolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
553 
554 		DescriptorSetLayoutBuilder setLayoutBuilder;
555 
556 		setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
557 		descriptorSetLayout = setLayoutBuilder.build(vk, device);
558 
559 		const VkDescriptorSetAllocateInfo descriptorSetAllocateInfo =
560 		{
561 			VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,		// VkStructureType				sType;
562 			DE_NULL,											// const void*					pNext;
563 			*descriptorPool,									// VkDescriptorPool				descriptorPool;
564 			1u,													// deUint32						setLayoutCount;
565 			&descriptorSetLayout.get()						// const VkDescriptorSetLayout*	pSetLayouts;
566 		};
567 
568 		descriptorSet = allocateDescriptorSet(vk, device, &descriptorSetAllocateInfo);
569 
570 		VkDescriptorBufferInfo markerBufferInfo = { *markerBuffer, 0, VK_WHOLE_SIZE };
571 
572 		VkWriteDescriptorSet writeSet[] =
573 		{
574 			{
575 				VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,	// VkStructureType                  sType;
576 				DE_NULL,								// const void*                      pNext;
577 				descriptorSet.get(),					// VkDescriptorSet                  dstSet;
578 				0,										// uint32_t                         dstBinding;
579 				0,										// uint32_t                         dstArrayElement;
580 				1,										// uint32_t                         descriptorCount;
581 				VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,		// VkDescriptorType                 descriptorType;
582 				DE_NULL,								// const VkDescriptorImageInfo*     pImageInfo;
583 				&markerBufferInfo,						// const VkDescriptorBufferInfo*    pBufferInfo;
584 				DE_NULL									// const VkBufferView*              pTexelBufferViev
585 			}
586 		};
587 
588 		vk.updateDescriptorSets(device, DE_LENGTH_OF_ARRAY(writeSet), writeSet, 0, DE_NULL);
589 
590 		VkDescriptorSetLayout setLayout = descriptorSetLayout.get();
591 
592 		pushConstantStage = (params.method == MEMORY_DEP_DISPATCH ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_FRAGMENT_BIT);
593 
594 		const VkPushConstantRange pushConstantRange =
595 		{
596 			pushConstantStage,	// VkShaderStageFlags    stageFlags;
597 			0u,					// uint32_t              offset;
598 			2 * sizeof(deUint32),	// uint32_t              size;
599 		};
600 
601 		const VkPipelineLayoutCreateInfo pipelineLayoutInfo =
602 		{
603 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
604 			DE_NULL,											// const void*					pNext;
605 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
606 			1u,													// deUint32						setLayoutCount;
607 			&setLayout,											// const VkDescriptorSetLayout*	pSetLayouts;
608 			1u,													// deUint32						pushConstantRangeCount;
609 			&pushConstantRange,									// const VkPushConstantRange*	pPushConstantRanges;
610 		};
611 
612 		pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutInfo);
613 	}
614 
615 	Move<VkRenderPass>		renderPass;
616 	Move<VkFramebuffer>		fbo;
617 	Move<VkPipeline>		pipeline;
618 	Move<VkShaderModule>	vertexModule;
619 	Move<VkShaderModule>	fragmentModule;
620 	Move<VkShaderModule>	computeModule;
621 
622 	if (params.method == MEMORY_DEP_DRAW)
623 	{
624 		const VkSubpassDescription subpassInfo =
625 		{
626 			0,									// VkSubpassDescriptionFlags       flags;
627 			VK_PIPELINE_BIND_POINT_GRAPHICS,	// VkPipelineBindPoint             pipelineBindPoint;
628 			0,									// uint32_t                        inputAttachmentCount;
629 			DE_NULL,							// const VkAttachmentReference*    pInputAttachments;
630 			0,									// uint32_t                        colorAttachmentCount;
631 			DE_NULL,							// const VkAttachmentReference*    pColorAttachments;
632 			0,									// const VkAttachmentReference*    pResolveAttachments;
633 			DE_NULL,							// const VkAttachmentReference*    pDepthStencilAttachment;
634 			0,									// uint32_t                        preserveAttachmentCount;
635 			DE_NULL								// const uint32_t*                 pPreserveAttachments;
636 		};
637 
638 		const VkRenderPassCreateInfo renderPassInfo =
639 		{
640 			VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,	// VkStructureType                   sType;
641 			DE_NULL,									// const void*                       pNext;
642 			0,											// VkRenderPassCreateFlags           flags;
643 			0,											// uint32_t                          attachmentCount;
644 			DE_NULL,									// const VkAttachmentDescription*    pAttachments;
645 			1,											// uint32_t                          subpassCount;
646 			&subpassInfo,								// const VkSubpassDescription*       pSubpasses;
647 			0u,											// uint32_t                          dependencyCount;
648 			DE_NULL										// const VkSubpassDependency*        pDependencies
649 		};
650 
651 		renderPass = createRenderPass(vk, device, &renderPassInfo);
652 
653 		const VkFramebufferCreateInfo framebufferInfo =
654 		{
655 			VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,	// VkStructureType             sType;
656 			DE_NULL,									// const void*                 pNext;
657 			0,											// VkFramebufferCreateFlags    flags;
658 			renderPass.get(),							// VkRenderPass                renderPass;
659 			0,											// uint32_t                    attachmentCount;
660 			DE_NULL,									// const VkImageView*          pAttachments;
661 			1,											// uint32_t                    width;
662 			1,											// uint32_t                    height;
663 			1,											// uint32_t                    layers;
664 		};
665 
666 		fbo = createFramebuffer(vk, device, &framebufferInfo);
667 
668 		vertexModule   = createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u);
669 		fragmentModule = createShaderModule(vk, device, context.getBinaryCollection().get("frag"), 0u);
670 
671 		const VkPipelineVertexInputStateCreateInfo vertexInputStateInfo =
672 		{
673 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,		// VkStructureType							sType;
674 			DE_NULL,														// const void*								pNext;
675 			(VkPipelineVertexInputStateCreateFlags)0,						// VkPipelineVertexInputStateCreateFlags	flags;
676 			0,																// uint32_t									vertexBindingDescriptionCount;
677 			DE_NULL,														// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
678 			0,																// uint32_t									vertexAttributeDescriptionCount;
679 			DE_NULL,														// const VkVertexInputAttributeDescription*	pVertexAttributeDescriptions;
680 		};
681 
682 		const VkPipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateInfo =
683 		{
684 			VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType							sType;
685 			DE_NULL,														// const void*								pNext;
686 			(VkPipelineInputAssemblyStateCreateFlags)0,						// VkPipelineInputAssemblyStateCreateFlags	flags;
687 			VK_PRIMITIVE_TOPOLOGY_POINT_LIST,								// VkPrimitiveTopology						topology;
688 			VK_FALSE,														// VkBool32									primitiveRestartEnable;
689 		};
690 
691 		std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
692 
693 		{
694 			const VkPipelineShaderStageCreateInfo createInfo =
695 			{
696 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType;
697 				DE_NULL,												// const void*							pNext;
698 				(VkPipelineShaderStageCreateFlags)0,					// VkPipelineShaderStageCreateFlags		flags;
699 				VK_SHADER_STAGE_VERTEX_BIT,								// VkShaderStageFlagBits				stage;
700 				vertexModule.get(),										// VkShaderModule						module;
701 				"main",													// const char*							pName;
702 				DE_NULL,												// const VkSpecializationInfo*			pSpecializationInfo;
703 			};
704 
705 			shaderStages.push_back(createInfo);
706 		}
707 
708 		{
709 			const VkPipelineShaderStageCreateInfo createInfo =
710 			{
711 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType;
712 				DE_NULL,												// const void*							pNext;
713 				(VkPipelineShaderStageCreateFlags)0,					// VkPipelineShaderStageCreateFlags		flags;
714 				VK_SHADER_STAGE_FRAGMENT_BIT,							// VkShaderStageFlagBits				stage;
715 				fragmentModule.get(),									// VkShaderModule						module;
716 				"main",													// const char*							pName;
717 				DE_NULL,												// const VkSpecializationInfo*			pSpecializationInfo;
718 			};
719 
720 			shaderStages.push_back(createInfo);
721 		}
722 
723 		VkViewport viewport;
724 
725 		viewport.x			= 0;
726 		viewport.y			= 0;
727 		viewport.width		= 1;
728 		viewport.height		= 1;
729 		viewport.minDepth	= 0.0f;
730 		viewport.maxDepth	= 1.0f;
731 
732 		VkRect2D scissor;
733 
734 		scissor.offset.x		= 0;
735 		scissor.offset.y		= 0;
736 		scissor.extent.width	= 1;
737 		scissor.extent.height	= 1;
738 
739 		const VkPipelineViewportStateCreateInfo pipelineViewportStateInfo =
740 		{
741 			VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,			// VkStructureType						sType;
742 			DE_NULL,														// const void*							pNext;
743 			(VkPipelineViewportStateCreateFlags)0,							// VkPipelineViewportStateCreateFlags	flags;
744 			1u,																// uint32_t								viewportCount;
745 			&viewport,														// const VkViewport*					pViewports;
746 			1u,																// uint32_t								scissorCount;
747 			&scissor,														// const VkRect2D*						pScissors;
748 		};
749 
750 		const VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateInfo =
751 		{
752 			VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,	// VkStructureType							sType;
753 			DE_NULL,													// const void*								pNext;
754 			(VkPipelineRasterizationStateCreateFlags)0,					// VkPipelineRasterizationStateCreateFlags	flags;
755 			VK_FALSE,													// VkBool32									depthClampEnable;
756 			VK_FALSE,													// VkBool32									rasterizerDiscardEnable;
757 			VK_POLYGON_MODE_FILL,										// VkPolygonMode							polygonMode;
758 			VK_CULL_MODE_NONE,											// VkCullModeFlags							cullMode;
759 			VK_FRONT_FACE_COUNTER_CLOCKWISE,							// VkFrontFace								frontFace;
760 			VK_FALSE,													// VkBool32									depthBiasEnable;
761 			0.0f,														// float									depthBiasConstantFactor;
762 			0.0f,														// float									depthBiasClamp;
763 			0.0f,														// float									depthBiasSlopeFactor;
764 			1.0f,														// float									lineWidth;
765 		};
766 
767 		const VkPipelineMultisampleStateCreateInfo pipelineMultisampleStateInfo =
768 		{
769 
770 			VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType							sType;
771 			DE_NULL,													// const void*								pNext;
772 			(VkPipelineMultisampleStateCreateFlags)0,					// VkPipelineMultisampleStateCreateFlags	flags;
773 			VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits					rasterizationSamples;
774 			VK_FALSE,													// VkBool32									sampleShadingEnable;
775 			1.0f,														// float									minSampleShading;
776 			DE_NULL,													// const VkSampleMask*						pSampleMask;
777 			VK_FALSE,													// VkBool32									alphaToCoverageEnable;
778 			VK_FALSE,													// VkBool32									alphaToOneEnable;
779 		};
780 
781 		const VkStencilOpState						noStencilOp				=
782 		{
783 			VK_STENCIL_OP_KEEP,		// VkStencilOp    failOp
784 			VK_STENCIL_OP_KEEP,		// VkStencilOp    passOp
785 			VK_STENCIL_OP_KEEP,		// VkStencilOp    depthFailOp
786 			VK_COMPARE_OP_NEVER,	// VkCompareOp    compareOp
787 			0,						// deUint32       compareMask
788 			0,						// deUint32       writeMask
789 			0						// deUint32       reference
790 		};
791 
792 		VkPipelineDepthStencilStateCreateInfo pipelineDepthStencilStateInfo =
793 		{
794 			VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,	// VkStructureType							sType;
795 			DE_NULL,													// const void*								pNext;
796 			(VkPipelineDepthStencilStateCreateFlags)0,					// VkPipelineDepthStencilStateCreateFlags	flags;
797 			VK_FALSE,													// VkBool32									depthTestEnable;
798 			VK_FALSE,													// VkBool32									depthWriteEnable;
799 			VK_COMPARE_OP_ALWAYS,										// VkCompareOp								depthCompareOp;
800 			VK_FALSE,													// VkBool32									depthBoundsTestEnable;
801 			VK_FALSE,													// VkBool32									stencilTestEnable;
802 			noStencilOp,												// VkStencilOpState							front;
803 			noStencilOp,												// VkStencilOpState							back;
804 			0.0f,														// float									minDepthBounds;
805 			1.0f,														// float									maxDepthBounds;
806 		};
807 
808 		const VkPipelineColorBlendStateCreateInfo pipelineColorBlendStateInfo =
809 		{
810 			VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	// VkStructureType								sType;
811 			DE_NULL,													// const void*									pNext;
812 			(VkPipelineColorBlendStateCreateFlags)0,					// VkPipelineColorBlendStateCreateFlags			flags;
813 			VK_FALSE,													// VkBool32										logicOpEnable;
814 			VK_LOGIC_OP_COPY,											// VkLogicOp									logicOp;
815 			0,															// deUint32										attachmentCount;
816 			DE_NULL,													// const VkPipelineColorBlendAttachmentState*	pAttachments;
817 			{ 0.0f, 0.0f, 0.0f, 0.0f },									// float										blendConstants[4];
818 		};
819 
820 		const VkGraphicsPipelineCreateInfo	graphicsPipelineInfo =
821 		{
822 			VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,			// VkStructureType									sType;
823 			DE_NULL,													// const void*										pNext;
824 			(VkPipelineCreateFlags)0,									// VkPipelineCreateFlags							flags;
825 			static_cast<deUint32>(shaderStages.size()),					// deUint32											stageCount;
826 			de::dataOrNull(shaderStages),								// const VkPipelineShaderStageCreateInfo*			pStages;
827 			&vertexInputStateInfo,										// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
828 			&pipelineInputAssemblyStateInfo,							// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
829 			DE_NULL,													// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
830 			&pipelineViewportStateInfo,									// const VkPipelineViewportStateCreateInfo*			pViewportState;
831 			&pipelineRasterizationStateInfo,							// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
832 			&pipelineMultisampleStateInfo,								// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
833 			&pipelineDepthStencilStateInfo,								// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
834 			&pipelineColorBlendStateInfo,								// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
835 			DE_NULL,													// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
836 			pipelineLayout.get(),										// VkPipelineLayout									layout;
837 			renderPass.get(),											// VkRenderPass										renderPass;
838 			0,															// deUint32											subpass;
839 			DE_NULL,													// VkPipeline										basePipelineHandle;
840 			0,															// deInt32											basePipelineIndex;
841 		};
842 
843 		pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineInfo);
844 	}
845 	else if (params.method == MEMORY_DEP_DISPATCH)
846 	{
847 		computeModule = createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u);
848 
849 		const VkPipelineShaderStageCreateInfo shaderStageInfo =
850 		{
851 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType;
852 			DE_NULL,												// const void*							pNext;
853 			(VkPipelineShaderStageCreateFlags)0,					// VkPipelineShaderStageCreateFlags		flags;
854 			VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits				stage;
855 			computeModule.get(),									// VkShaderModule						module;
856 			"main",													// const char*							pName;
857 			DE_NULL													// const VkSpecializationInfo*			pSpecializationInfo;
858 		};
859 
860 		const VkComputePipelineCreateInfo computePipelineInfo =
861 		{
862 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType                    sType;
863 			DE_NULL,										// const void*                        pNext;
864 			0u,												// VkPipelineCreateFlags              flags;
865 			shaderStageInfo,								// VkPipelineShaderStageCreateInfo    stage;
866 			pipelineLayout.get(),							// VkPipelineLayout                   layout;
867 			DE_NULL,										// VkPipeline                         basePipelineHandle;
868 			0												// int32_t                            basePipelineIndex;
869 		};
870 
871 		pipeline = createComputePipeline(vk, device, DE_NULL, &computePipelineInfo);
872 	}
873 
874 	if (params.base.useHostPtr)
875 	{
876 		writeHostMemory(vk, device, markerMemory->getMemory(), static_cast<size_t>(markerBufferSize), hostMemory->size, &expected[0]);
877 	}
878 	else
879 	{
880 		deMemcpy(markerMemory->getHostPtr(), &expected[0], static_cast<size_t>(markerBufferSize));
881 		flushMappedMemoryRange(vk, device, markerMemory->getMemory(), markerMemory->getOffset(), VK_WHOLE_SIZE);
882 	}
883 
884 	const Unique<VkCommandPool>		cmdPool(createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, wd.queueFamilyIdx));
885 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
886 
887 	beginCommandBuffer(vk, *cmdBuffer);
888 
889 	VkDescriptorSet setHandle = *descriptorSet;
890 
891 	std::vector<MemoryDepOwner>	dataOwner(size, MEMORY_DEP_OWNER_NOBODY);
892 
893 	if (params.method == MEMORY_DEP_DRAW)
894 	{
895 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
896 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0, 1, &setHandle, 0, DE_NULL);
897 	}
898 	else if (params.method == MEMORY_DEP_DISPATCH)
899 	{
900 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
901 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &setHandle, 0, DE_NULL);
902 	}
903 
904 	if (params.base.useHostPtr)
905 	{
906 		writeHostMemory(vk, device, markerMemory->getMemory(), static_cast<size_t>(markerBufferSize), hostMemory->size, &expected[0]);
907 	}
908 	else
909 	{
910 		deMemcpy(markerMemory->getHostPtr(), &expected[0], static_cast<size_t>(markerBufferSize));
911 		flushMappedMemoryRange(vk, device, markerMemory->getMemory(), markerMemory->getOffset(), VK_WHOLE_SIZE);
912 	}
913 
914 	deUint32 writeStages = 0;
915 	deUint32 writeAccess = 0;
916 
917 	for (deUint32 i = 0; i < numIters; ++i)
918 	{
919 		deUint32		slot		= rng.getUint32() % size;
920 		MemoryDepOwner	oldOwner	= dataOwner[slot];
921 		MemoryDepOwner	newOwner	= static_cast<MemoryDepOwner>(1 + (rng.getUint32() % 2));
922 
923 		DE_ASSERT(newOwner == MEMORY_DEP_OWNER_MARKER || newOwner == MEMORY_DEP_OWNER_NON_MARKER);
924 		DE_ASSERT(slot < size);
925 
926 		if ((oldOwner != newOwner && oldOwner != MEMORY_DEP_OWNER_NOBODY) ||
927 			(oldOwner == MEMORY_DEP_OWNER_NON_MARKER && newOwner == MEMORY_DEP_OWNER_NON_MARKER))
928 		{
929 			VkBufferMemoryBarrier memoryDep =
930 			{
931 				VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,    // VkStructureType    sType;
932 				DE_NULL,                                    // const void*        pNext;
933 				0,                                          // VkAccessFlags      srcAccessMask;
934 				0,                                          // VkAccessFlags      dstAccessMask;
935 				wd.queueFamilyIdx,                          // uint32_t           srcQueueFamilyIndex;
936 				wd.queueFamilyIdx,                          // uint32_t           dstQueueFamilyIndex;
937 				*markerBuffer,                              // VkBuffer           buffer;
938 				sizeof(deUint32) * slot,                    // VkDeviceSize       offset;
939 				sizeof(deUint32)                            // VkDeviceSize       size;
940 			};
941 
942 			VkPipelineStageFlags srcStageMask;
943 			VkPipelineStageFlags dstStageMask;
944 
945 			computeMemoryDepBarrier(params, oldOwner, &memoryDep.srcAccessMask, &srcStageMask);
946 			computeMemoryDepBarrier(params, newOwner, &memoryDep.dstAccessMask, &dstStageMask);
947 
948 			vk.cmdPipelineBarrier(*cmdBuffer, srcStageMask, dstStageMask, 0, 0, DE_NULL, 1, &memoryDep, 0, DE_NULL);
949 		}
950 
951 		if (params.method == MEMORY_DEP_DRAW)
952 		{
953 			const VkRenderPassBeginInfo beginInfo =
954 			{
955 				VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,	// VkStructureType        sType;
956 				DE_NULL,									// const void*            pNext;
957 				renderPass.get(),							// VkRenderPass           renderPass;
958 				fbo.get(),									// VkFramebuffer          framebuffer;
959 				{ { 0, 0, }, { 1, 1 } },					// VkRect2D               renderArea;
960 				0,											// uint32_t               clearValueCount;
961 				DE_NULL										// const VkClearValue*    pClearValues;
962 			};
963 
964 			vk.cmdBeginRenderPass(*cmdBuffer, &beginInfo, VK_SUBPASS_CONTENTS_INLINE);
965 		}
966 
967 		const deUint32 value = i;
968 
969 		if (newOwner == MEMORY_DEP_OWNER_MARKER)
970 		{
971 			vk.cmdWriteBufferMarkerAMD(*cmdBuffer, params.base.stage, *markerBuffer, sizeof(deUint32) * slot, value);
972 
973 			writeStages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
974 			writeAccess |= VK_ACCESS_TRANSFER_WRITE_BIT;
975 		}
976 		else
977 		{
978 			DE_ASSERT(newOwner == MEMORY_DEP_OWNER_NON_MARKER);
979 
980 			if (params.method == MEMORY_DEP_COPY)
981 			{
982 				vk.cmdUpdateBuffer(*cmdBuffer, *markerBuffer, sizeof(deUint32) * slot, sizeof(deUint32), &value);
983 
984 				writeStages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
985 				writeAccess |= VK_ACCESS_TRANSFER_WRITE_BIT;
986 			}
987 			else if (params.method == MEMORY_DEP_DRAW)
988 			{
989 				const deUint32 pushConst[] = { slot, value };
990 
991 				vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, pushConstantStage, 0, sizeof(pushConst), pushConst);
992 				vk.cmdDraw(*cmdBuffer, 1, 1, i, 0);
993 
994 				writeStages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
995 				writeAccess |= VK_ACCESS_SHADER_WRITE_BIT;
996 			}
997 			else
998 			{
999 				const deUint32 pushConst[] = { slot, value };
1000 
1001 				vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, pushConstantStage, 0, sizeof(pushConst), pushConst);
1002 				vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1003 
1004 				writeStages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
1005 				writeAccess |= VK_ACCESS_SHADER_WRITE_BIT;
1006 			}
1007 		}
1008 
1009 		dataOwner[slot] = newOwner;
1010 		expected[slot]  = value;
1011 
1012 		if (params.method == MEMORY_DEP_DRAW)
1013 		{
1014 			vk.cmdEndRenderPass(*cmdBuffer);
1015 		}
1016 	}
1017 
1018 	const VkMemoryBarrier memoryDep =
1019 	{
1020 		VK_STRUCTURE_TYPE_MEMORY_BARRIER,
1021 		DE_NULL,
1022 		writeAccess,
1023 		VK_ACCESS_HOST_READ_BIT,
1024 	};
1025 
1026 	vk.cmdPipelineBarrier(*cmdBuffer, writeStages, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memoryDep, 0, DE_NULL, 0, DE_NULL);
1027 
1028 	VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
1029 
1030 	submitCommandsAndWait(vk, device, wd.queue, *cmdBuffer);
1031 
1032 	if (!checkMarkerBuffer(vk, device, markerMemory, expected, params.base.useHostPtr ? hostMemory->size : 0, params.base.useHostPtr))
1033 		return tcu::TestStatus::fail("Some marker values were incorrect");
1034 
1035 	return tcu::TestStatus::pass("Pass");
1036 }
1037 
initMemoryDepPrograms(SourceCollections & programCollection,const MemoryDepParams params)1038 void initMemoryDepPrograms(SourceCollections& programCollection, const MemoryDepParams params)
1039 {
1040 	if (params.method == MEMORY_DEP_DRAW)
1041 	{
1042 		{
1043 			std::ostringstream src;
1044 
1045             src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1046                 << "layout(location = 0) flat out uint offset;\n"
1047                 << "out gl_PerVertex { vec4 gl_Position; float gl_PointSize; };\n"
1048 				<< "void main() {\n"
1049 				<< "	offset = gl_VertexIndex;\n"
1050 				<< "	gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1051 				<< "	gl_PointSize = 1.0f;\n"
1052 				<< "}\n";
1053 
1054 			programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
1055 		}
1056 
1057 		{
1058 			std::ostringstream src;
1059 
1060 			src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1061 				<< "layout(push_constant) uniform Constants { uvec2 params; } pc;\n"
1062 				<< "layout(std430, set = 0, binding = 0) buffer Data { uint elems[]; } data;\n"
1063 				<< "layout(location = 0) flat in uint offset;\n"
1064 				<< "void main() {\n"
1065 				<< "	data.elems[pc.params.x] = pc.params.y;\n"
1066 				<< "}\n";
1067 
1068 			programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
1069 		}
1070 	}
1071 	else if (params.method == MEMORY_DEP_DISPATCH)
1072 	{
1073 		{
1074 			std::ostringstream src;
1075 
1076 			src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1077 				<< "layout(local_size_x = 1u, local_size_y = 1u, local_size_z = 1u) in;\n"
1078 				<< "layout(push_constant) uniform Constants { uvec2 params; } pc;\n"
1079 				<< "layout(std430, set = 0, binding = 0) buffer Data { uint elems[]; } data;\n"
1080 				<< "void main() {\n"
1081 				<< "	data.elems[pc.params.x] = pc.params.y;\n"
1082 				<< "}\n";
1083 
1084 			programCollection.glslSources.add("comp") << glu::ComputeSource(src.str());
1085 		}
1086 	}
1087 }
1088 
checkBufferMarkerSupport(Context & context,BaseTestParams params)1089 void checkBufferMarkerSupport (Context& context, BaseTestParams params)
1090 {
1091 	if (params.useHostPtr)
1092 		context.requireDeviceFunctionality("VK_EXT_external_memory_host");
1093 
1094 	context.requireDeviceFunctionality("VK_AMD_buffer_marker");
1095 }
1096 
checkBufferMarkerSupport(Context & context,MemoryDepParams params)1097 void checkBufferMarkerSupport (Context& context, MemoryDepParams params)
1098 {
1099 	if (params.base.useHostPtr)
1100 		context.requireDeviceFunctionality("VK_EXT_external_memory_host");
1101 
1102 	context.requireDeviceFunctionality("VK_AMD_buffer_marker");
1103 }
1104 
getTestCaseName(const std::string base,size_t offset)1105 std::string getTestCaseName(const std::string base, size_t offset)
1106 {
1107 	if (offset == 0)
1108 		return base;
1109 	return base + "_offset_" + std::to_string(offset);
1110 }
1111 
createBufferMarkerTestsInGroup(tcu::TestContext & testCtx)1112 tcu::TestCaseGroup* createBufferMarkerTestsInGroup(tcu::TestContext& testCtx)
1113 {
1114 	// AMD_buffer_marker Tests
1115 	tcu::TestCaseGroup* root = (new tcu::TestCaseGroup(testCtx, "buffer_marker"));
1116 
1117 	VkQueueFlagBits queues[] = { VK_QUEUE_GRAPHICS_BIT, VK_QUEUE_COMPUTE_BIT, VK_QUEUE_TRANSFER_BIT };
1118 	const char* queueNames[] = { "graphics", "compute", "transfer" };
1119 
1120 	BaseTestParams base;
1121 	deMemset(&base, 0, sizeof(base));
1122 
1123 	for (size_t queueNdx = 0; queueNdx < DE_LENGTH_OF_ARRAY(queues); ++queueNdx)
1124 	{
1125 		tcu::TestCaseGroup* queueGroup = (new tcu::TestCaseGroup(testCtx, queueNames[queueNdx], "Buffer marker tests for a specific queue family"));
1126 
1127 		const char* memoryNames[] = { "external_host_mem", "default_mem" };
1128 		const bool memoryTypes[] = { true, false };
1129 
1130 		base.testQueue = queues[queueNdx];
1131 
1132 		for (size_t memNdx = 0; memNdx < DE_LENGTH_OF_ARRAY(memoryTypes); ++memNdx)
1133 		{
1134 			tcu::TestCaseGroup* memoryGroup = (new tcu::TestCaseGroup(testCtx, memoryNames[memNdx], "Buffer marker tests for different kinds of backing memory"));
1135 
1136 			base.useHostPtr = memoryTypes[memNdx];
1137 
1138 			VkPipelineStageFlagBits stages[] = { VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT };
1139 			const char* stageNames[] = { "top_of_pipe", "bottom_of_pipe" };
1140 
1141 			for (size_t stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stages); ++stageNdx)
1142 			{
1143 				tcu::TestCaseGroup* stageGroup = (new tcu::TestCaseGroup(testCtx, stageNames[stageNdx], "Buffer marker tests for a specific pipeline stage"));
1144 
1145 				base.stage = stages[stageNdx];
1146 
1147 				{
1148 					tcu::TestCaseGroup* sequentialGroup = (new tcu::TestCaseGroup(testCtx, "sequential", "Buffer marker tests for sequentially writing"));
1149 
1150 					base.size = 4;
1151 					base.offset = 0;
1152 
1153 					// Writes 4 sequential marker values into a buffer
1154 					addFunctionCase(sequentialGroup, "4", checkBufferMarkerSupport, bufferMarkerSequential, base);
1155 
1156 					base.size = 64;
1157 					base.offset = 0;
1158 
1159 					// Writes 64 sequential marker values into a buffer
1160 					addFunctionCase(sequentialGroup, "64", checkBufferMarkerSupport, bufferMarkerSequential, base);
1161 
1162 					base.offset = 16;
1163 
1164 					// Writes 64 sequential marker values into a buffer offset by 16
1165 					addFunctionCase(sequentialGroup, getTestCaseName("64", base.offset), checkBufferMarkerSupport, bufferMarkerSequential, base);
1166 
1167 					base.size = 65536;
1168 					base.offset = 0;
1169 
1170 					// Writes 65536 sequential marker values into a buffer
1171 					addFunctionCase(sequentialGroup, "65536", checkBufferMarkerSupport, bufferMarkerSequential, base);
1172 
1173 					base.offset = 1024;
1174 
1175 					// Writes 65536 sequential marker values into a buffer offset by 1024
1176 					addFunctionCase(sequentialGroup, getTestCaseName("65536", base.offset), checkBufferMarkerSupport, bufferMarkerSequential, base);
1177 
1178 					base.offset = 0;
1179 					stageGroup->addChild(sequentialGroup);
1180 				}
1181 
1182 				{
1183 					tcu::TestCaseGroup* overwriteGroup = (new tcu::TestCaseGroup(testCtx, "overwrite", "Buffer marker tests for overwriting values with implicit synchronization"));
1184 
1185 					base.size = 1;
1186 
1187 					// Randomly overwrites marker values to a 1-size buffer
1188 					addFunctionCase(overwriteGroup, "1", checkBufferMarkerSupport, bufferMarkerOverwrite, base);
1189 
1190 					base.size = 4;
1191 
1192 					// Randomly overwrites marker values to a 4-size buffer
1193 					addFunctionCase(overwriteGroup, "4", checkBufferMarkerSupport, bufferMarkerOverwrite, base);
1194 
1195 					base.size = 64;
1196 
1197 					// Randomly overwrites markers values to a 64-size buffer
1198 					addFunctionCase(overwriteGroup, "64", checkBufferMarkerSupport, bufferMarkerOverwrite, base);
1199 					base.offset = 24;
1200 
1201 					// Randomly overwrites markers values to a 64-size buffer at offset 24
1202 					addFunctionCase(overwriteGroup, getTestCaseName("64", base.offset), checkBufferMarkerSupport, bufferMarkerOverwrite, base);
1203 
1204 					base.offset = 0;
1205 
1206 					stageGroup->addChild(overwriteGroup);
1207 				}
1208 
1209 				{
1210 					tcu::TestCaseGroup* memoryDepGroup = (new tcu::TestCaseGroup(testCtx, "memory_dep", "Buffer marker tests for memory dependencies between marker writes and other operations"));
1211 
1212 					MemoryDepParams params;
1213 					size_t offsets[] = { 0, 24 };
1214 					deMemset(&params, 0, sizeof(params));
1215 
1216 					for (size_t offsetIdx = 0; offsetIdx < de::arrayLength(offsets); offsetIdx++) {
1217 						params.base		 = base;
1218 						params.base.size = 128;
1219 						params.base.offset = offsets[offsetIdx];
1220 
1221 						if (params.base.testQueue == VK_QUEUE_GRAPHICS_BIT)
1222 						{
1223 							params.method = MEMORY_DEP_DRAW;
1224 
1225 							// Test memory dependencies between marker writes and draws
1226 							addFunctionCaseWithPrograms(memoryDepGroup, getTestCaseName("draw", params.base.offset), checkBufferMarkerSupport, initMemoryDepPrograms, bufferMarkerMemoryDep, params);
1227 						}
1228 
1229 						if (params.base.testQueue != VK_QUEUE_TRANSFER_BIT)
1230 						{
1231 							params.method = MEMORY_DEP_DISPATCH;
1232 
1233 							// Test memory dependencies between marker writes and compute dispatches
1234 							addFunctionCaseWithPrograms(memoryDepGroup, getTestCaseName("dispatch", params.base.offset), checkBufferMarkerSupport, initMemoryDepPrograms, bufferMarkerMemoryDep, params);
1235 						}
1236 
1237 						params.method = MEMORY_DEP_COPY;
1238 
1239 						// Test memory dependencies between marker writes and buffer copies
1240 						addFunctionCaseWithPrograms(memoryDepGroup, getTestCaseName("buffer_copy", params.base.offset), checkBufferMarkerSupport, initMemoryDepPrograms, bufferMarkerMemoryDep, params);
1241 					}
1242 
1243 					stageGroup->addChild(memoryDepGroup);
1244 				}
1245 
1246 				memoryGroup->addChild(stageGroup);
1247 			}
1248 
1249 			queueGroup->addChild(memoryGroup);
1250 		}
1251 
1252 		root->addChild(queueGroup);
1253 	}
1254 
1255 	return root;
1256 }
1257 
1258 } // anonymous ns
1259 
createBufferMarkerTests(tcu::TestContext & testCtx)1260 tcu::TestCaseGroup* createBufferMarkerTests (tcu::TestContext& testCtx)
1261 {
1262 	return createBufferMarkerTestsInGroup(testCtx);
1263 }
1264 
1265 } // api
1266 } // vkt
1267