• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Sparse buffer tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesBufferTests.hpp"
25 #include "vktTestCaseUtil.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktSparseResourcesTestsUtil.hpp"
28 #include "vktSparseResourcesBase.hpp"
29 #include "vktSparseResourcesBufferSparseBinding.hpp"
30 #include "vktSparseResourcesBufferSparseResidency.hpp"
31 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
32 
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkPlatform.hpp"
36 #include "vkPrograms.hpp"
37 #include "vkMemUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkQueryUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 
42 #include "tcuTestLog.hpp"
43 
44 #include "deUniquePtr.hpp"
45 #include "deSharedPtr.hpp"
46 #include "deMath.h"
47 
48 #include <string>
49 #include <vector>
50 #include <map>
51 
52 using namespace vk;
53 using de::MovePtr;
54 using de::UniquePtr;
55 using de::SharedPtr;
56 using tcu::Vec4;
57 using tcu::IVec2;
58 using tcu::IVec4;
59 
60 namespace vkt
61 {
62 namespace sparse
63 {
64 namespace
65 {
66 
67 typedef SharedPtr<UniquePtr<Allocation> > AllocationSp;
68 
69 enum
70 {
71 	RENDER_SIZE		= 128,				//!< framebuffer size in pixels
72 	GRID_SIZE		= RENDER_SIZE / 8,	//!< number of grid tiles in a row
73 };
74 
75 enum TestFlagBits
76 {
77 												//   sparseBinding is implied
78 	TEST_FLAG_ALIASED				= 1u << 0,	//!< sparseResidencyAliased
79 	TEST_FLAG_RESIDENCY				= 1u << 1,	//!< sparseResidencyBuffer
80 	TEST_FLAG_NON_RESIDENT_STRICT	= 1u << 2,	//!< residencyNonResidentStrict
81 	TEST_FLAG_ENABLE_DEVICE_GROUPS	= 1u << 3,	//!< device groups are enabled
82 };
83 typedef deUint32 TestFlags;
84 
85 //! SparseAllocationBuilder output. Owns the allocated memory.
86 struct SparseAllocation
87 {
88 	deUint32							numResourceChunks;
89 	VkDeviceSize						resourceSize;		//!< buffer size in bytes
90 	std::vector<AllocationSp>			allocations;		//!< actual allocated memory
91 	std::vector<VkSparseMemoryBind>		memoryBinds;		//!< memory binds backing the resource
92 };
93 
94 //! Utility to lay out memory allocations for a sparse buffer, including holes and aliased regions.
95 //! Will allocate memory upon building.
96 class SparseAllocationBuilder
97 {
98 public:
99 								SparseAllocationBuilder	(void);
100 
101 	// \note "chunk" is the smallest (due to alignment) bindable amount of memory
102 
103 	SparseAllocationBuilder&	addMemoryHole			(const deUint32 numChunks = 1u);
104 	SparseAllocationBuilder&	addResourceHole			(const deUint32 numChunks = 1u);
105 	SparseAllocationBuilder&	addMemoryBind			(const deUint32 numChunks = 1u);
106 	SparseAllocationBuilder&	addAliasedMemoryBind	(const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks = 1u);
107 	SparseAllocationBuilder&	addMemoryAllocation		(void);
108 
109 	MovePtr<SparseAllocation>	build					(const DeviceInterface&		vk,
110 														 const VkDevice				device,
111 														 Allocator&					allocator,
112 														 VkBufferCreateInfo			referenceCreateInfo,		//!< buffer size is ignored in this info
113 														 const VkDeviceSize			minChunkSize = 0ull) const;	//!< make sure chunks are at least this big
114 
115 private:
116 	struct MemoryBind
117 	{
118 		deUint32	allocationNdx;
119 		deUint32	resourceChunkNdx;
120 		deUint32	memoryChunkNdx;
121 		deUint32	numChunks;
122 	};
123 
124 	deUint32					m_allocationNdx;
125 	deUint32					m_resourceChunkNdx;
126 	deUint32					m_memoryChunkNdx;
127 	std::vector<MemoryBind>		m_memoryBinds;
128 	std::vector<deUint32>		m_chunksPerAllocation;
129 
130 };
131 
SparseAllocationBuilder(void)132 SparseAllocationBuilder::SparseAllocationBuilder (void)
133 	: m_allocationNdx		(0)
134 	, m_resourceChunkNdx	(0)
135 	, m_memoryChunkNdx		(0)
136 {
137 	m_chunksPerAllocation.push_back(0);
138 }
139 
addMemoryHole(const deUint32 numChunks)140 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryHole (const deUint32 numChunks)
141 {
142 	m_memoryChunkNdx						+= numChunks;
143 	m_chunksPerAllocation[m_allocationNdx]	+= numChunks;
144 
145 	return *this;
146 }
147 
addResourceHole(const deUint32 numChunks)148 SparseAllocationBuilder& SparseAllocationBuilder::addResourceHole (const deUint32 numChunks)
149 {
150 	m_resourceChunkNdx += numChunks;
151 
152 	return *this;
153 }
154 
addMemoryAllocation(void)155 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryAllocation (void)
156 {
157 	DE_ASSERT(m_memoryChunkNdx != 0);	// doesn't make sense to have an empty allocation
158 
159 	m_allocationNdx  += 1;
160 	m_memoryChunkNdx  = 0;
161 	m_chunksPerAllocation.push_back(0);
162 
163 	return *this;
164 }
165 
addMemoryBind(const deUint32 numChunks)166 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryBind (const deUint32 numChunks)
167 {
168 	const MemoryBind memoryBind =
169 	{
170 		m_allocationNdx,
171 		m_resourceChunkNdx,
172 		m_memoryChunkNdx,
173 		numChunks
174 	};
175 	m_memoryBinds.push_back(memoryBind);
176 
177 	m_resourceChunkNdx						+= numChunks;
178 	m_memoryChunkNdx						+= numChunks;
179 	m_chunksPerAllocation[m_allocationNdx]	+= numChunks;
180 
181 	return *this;
182 }
183 
addAliasedMemoryBind(const deUint32 allocationNdx,const deUint32 chunkOffset,const deUint32 numChunks)184 SparseAllocationBuilder& SparseAllocationBuilder::addAliasedMemoryBind	(const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks)
185 {
186 	DE_ASSERT(allocationNdx <= m_allocationNdx);
187 
188 	const MemoryBind memoryBind =
189 	{
190 		allocationNdx,
191 		m_resourceChunkNdx,
192 		chunkOffset,
193 		numChunks
194 	};
195 	m_memoryBinds.push_back(memoryBind);
196 
197 	m_resourceChunkNdx += numChunks;
198 
199 	return *this;
200 }
201 
requirementsWithSize(VkMemoryRequirements requirements,const VkDeviceSize size)202 inline VkMemoryRequirements requirementsWithSize (VkMemoryRequirements requirements, const VkDeviceSize size)
203 {
204 	requirements.size = size;
205 	return requirements;
206 }
207 
build(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkBufferCreateInfo referenceCreateInfo,const VkDeviceSize minChunkSize) const208 MovePtr<SparseAllocation> SparseAllocationBuilder::build (const DeviceInterface&	vk,
209 														  const VkDevice			device,
210 														  Allocator&				allocator,
211 														  VkBufferCreateInfo		referenceCreateInfo,
212 														  const VkDeviceSize		minChunkSize) const
213 {
214 
215 	MovePtr<SparseAllocation>	sparseAllocation			(new SparseAllocation());
216 
217 								referenceCreateInfo.size	= sizeof(deUint32);
218 	const Unique<VkBuffer>		refBuffer					(createBuffer(vk, device, &referenceCreateInfo));
219 	const VkMemoryRequirements	memoryRequirements			= getBufferMemoryRequirements(vk, device, *refBuffer);
220 	const VkDeviceSize			chunkSize					= std::max(memoryRequirements.alignment, static_cast<VkDeviceSize>(deAlign64(minChunkSize, memoryRequirements.alignment)));
221 
222 	for (std::vector<deUint32>::const_iterator numChunksIter = m_chunksPerAllocation.begin(); numChunksIter != m_chunksPerAllocation.end(); ++numChunksIter)
223 	{
224 		sparseAllocation->allocations.push_back(makeDeSharedPtr(
225 			allocator.allocate(requirementsWithSize(memoryRequirements, *numChunksIter * chunkSize), MemoryRequirement::Any)));
226 	}
227 
228 	for (std::vector<MemoryBind>::const_iterator memBindIter = m_memoryBinds.begin(); memBindIter != m_memoryBinds.end(); ++memBindIter)
229 	{
230 		const Allocation&			alloc	= **sparseAllocation->allocations[memBindIter->allocationNdx];
231 		const VkSparseMemoryBind	bind	=
232 		{
233 			memBindIter->resourceChunkNdx * chunkSize,							// VkDeviceSize               resourceOffset;
234 			memBindIter->numChunks * chunkSize,									// VkDeviceSize               size;
235 			alloc.getMemory(),													// VkDeviceMemory             memory;
236 			alloc.getOffset() + memBindIter->memoryChunkNdx * chunkSize,		// VkDeviceSize               memoryOffset;
237 			(VkSparseMemoryBindFlags)0,											// VkSparseMemoryBindFlags    flags;
238 		};
239 		sparseAllocation->memoryBinds.push_back(bind);
240 		referenceCreateInfo.size = std::max(referenceCreateInfo.size, bind.resourceOffset + bind.size);
241 	}
242 
243 	sparseAllocation->resourceSize		= referenceCreateInfo.size;
244 	sparseAllocation->numResourceChunks = m_resourceChunkNdx;
245 
246 	return sparseAllocation;
247 }
248 
makeImageCreateInfo(const VkFormat format,const IVec2 & size,const VkImageUsageFlags usage)249 VkImageCreateInfo makeImageCreateInfo (const VkFormat format, const IVec2& size, const VkImageUsageFlags usage)
250 {
251 	const VkImageCreateInfo imageParams =
252 	{
253 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,			// VkStructureType			sType;
254 		DE_NULL,										// const void*				pNext;
255 		(VkImageCreateFlags)0,							// VkImageCreateFlags		flags;
256 		VK_IMAGE_TYPE_2D,								// VkImageType				imageType;
257 		format,											// VkFormat					format;
258 		makeExtent3D(size.x(), size.y(), 1),			// VkExtent3D				extent;
259 		1u,												// deUint32					mipLevels;
260 		1u,												// deUint32					arrayLayers;
261 		VK_SAMPLE_COUNT_1_BIT,							// VkSampleCountFlagBits	samples;
262 		VK_IMAGE_TILING_OPTIMAL,						// VkImageTiling			tiling;
263 		usage,											// VkImageUsageFlags		usage;
264 		VK_SHARING_MODE_EXCLUSIVE,						// VkSharingMode			sharingMode;
265 		0u,												// deUint32					queueFamilyIndexCount;
266 		DE_NULL,										// const deUint32*			pQueueFamilyIndices;
267 		VK_IMAGE_LAYOUT_UNDEFINED,						// VkImageLayout			initialLayout;
268 	};
269 	return imageParams;
270 }
271 
makeRenderPass(const DeviceInterface & vk,const VkDevice device,const VkFormat colorFormat)272 Move<VkRenderPass> makeRenderPass (const DeviceInterface&	vk,
273 								   const VkDevice			device,
274 								   const VkFormat			colorFormat)
275 {
276 	const VkAttachmentDescription colorAttachmentDescription =
277 	{
278 		(VkAttachmentDescriptionFlags)0,					// VkAttachmentDescriptionFlags		flags;
279 		colorFormat,										// VkFormat							format;
280 		VK_SAMPLE_COUNT_1_BIT,								// VkSampleCountFlagBits			samples;
281 		VK_ATTACHMENT_LOAD_OP_CLEAR,						// VkAttachmentLoadOp				loadOp;
282 		VK_ATTACHMENT_STORE_OP_STORE,						// VkAttachmentStoreOp				storeOp;
283 		VK_ATTACHMENT_LOAD_OP_DONT_CARE,					// VkAttachmentLoadOp				stencilLoadOp;
284 		VK_ATTACHMENT_STORE_OP_DONT_CARE,					// VkAttachmentStoreOp				stencilStoreOp;
285 		VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout					initialLayout;
286 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,			// VkImageLayout					finalLayout;
287 	};
288 
289 	const VkAttachmentReference colorAttachmentRef =
290 	{
291 		0u,													// deUint32			attachment;
292 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL			// VkImageLayout	layout;
293 	};
294 
295 	const VkSubpassDescription subpassDescription =
296 	{
297 		(VkSubpassDescriptionFlags)0,						// VkSubpassDescriptionFlags		flags;
298 		VK_PIPELINE_BIND_POINT_GRAPHICS,					// VkPipelineBindPoint				pipelineBindPoint;
299 		0u,													// deUint32							inputAttachmentCount;
300 		DE_NULL,											// const VkAttachmentReference*		pInputAttachments;
301 		1u,													// deUint32							colorAttachmentCount;
302 		&colorAttachmentRef,								// const VkAttachmentReference*		pColorAttachments;
303 		DE_NULL,											// const VkAttachmentReference*		pResolveAttachments;
304 		DE_NULL,											// const VkAttachmentReference*		pDepthStencilAttachment;
305 		0u,													// deUint32							preserveAttachmentCount;
306 		DE_NULL												// const deUint32*					pPreserveAttachments;
307 	};
308 
309 	const VkRenderPassCreateInfo renderPassInfo =
310 	{
311 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// VkStructureType					sType;
312 		DE_NULL,											// const void*						pNext;
313 		(VkRenderPassCreateFlags)0,							// VkRenderPassCreateFlags			flags;
314 		1u,													// deUint32							attachmentCount;
315 		&colorAttachmentDescription,						// const VkAttachmentDescription*	pAttachments;
316 		1u,													// deUint32							subpassCount;
317 		&subpassDescription,								// const VkSubpassDescription*		pSubpasses;
318 		0u,													// deUint32							dependencyCount;
319 		DE_NULL												// const VkSubpassDependency*		pDependencies;
320 	};
321 
322 	return createRenderPass(vk, device, &renderPassInfo);
323 }
324 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkRenderPass renderPass,const IVec2 renderSize,const VkPrimitiveTopology topology,const deUint32 stageCount,const VkPipelineShaderStageCreateInfo * pStages)325 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface&					vk,
326 									   const VkDevice							device,
327 									   const VkPipelineLayout					pipelineLayout,
328 									   const VkRenderPass						renderPass,
329 									   const IVec2								renderSize,
330 									   const VkPrimitiveTopology				topology,
331 									   const deUint32							stageCount,
332 									   const VkPipelineShaderStageCreateInfo*	pStages)
333 {
334 	const VkVertexInputBindingDescription vertexInputBindingDescription =
335 	{
336 		0u,								// uint32_t				binding;
337 		sizeof(Vec4),					// uint32_t				stride;
338 		VK_VERTEX_INPUT_RATE_VERTEX,	// VkVertexInputRate	inputRate;
339 	};
340 
341 	const VkVertexInputAttributeDescription vertexInputAttributeDescription =
342 	{
343 		0u,									// uint32_t			location;
344 		0u,									// uint32_t			binding;
345 		VK_FORMAT_R32G32B32A32_SFLOAT,		// VkFormat			format;
346 		0u,									// uint32_t			offset;
347 	};
348 
349 	const VkPipelineVertexInputStateCreateInfo vertexInputStateInfo =
350 	{
351 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,		// VkStructureType                             sType;
352 		DE_NULL,														// const void*                                 pNext;
353 		(VkPipelineVertexInputStateCreateFlags)0,						// VkPipelineVertexInputStateCreateFlags       flags;
354 		1u,																// uint32_t                                    vertexBindingDescriptionCount;
355 		&vertexInputBindingDescription,									// const VkVertexInputBindingDescription*      pVertexBindingDescriptions;
356 		1u,																// uint32_t                                    vertexAttributeDescriptionCount;
357 		&vertexInputAttributeDescription,								// const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions;
358 	};
359 
360 	const VkPipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateInfo =
361 	{
362 		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType                             sType;
363 		DE_NULL,														// const void*                                 pNext;
364 		(VkPipelineInputAssemblyStateCreateFlags)0,						// VkPipelineInputAssemblyStateCreateFlags     flags;
365 		topology,														// VkPrimitiveTopology                         topology;
366 		VK_FALSE,														// VkBool32                                    primitiveRestartEnable;
367 	};
368 
369 	const VkViewport viewport = makeViewport(
370 		0.0f, 0.0f,
371 		static_cast<float>(renderSize.x()), static_cast<float>(renderSize.y()),
372 		0.0f, 1.0f);
373 
374 	const VkRect2D scissor = {
375 		makeOffset2D(0, 0),
376 		makeExtent2D(static_cast<deUint32>(renderSize.x()), static_cast<deUint32>(renderSize.y())),
377 	};
378 
379 	const VkPipelineViewportStateCreateInfo pipelineViewportStateInfo =
380 	{
381 		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,			// VkStructureType                             sType;
382 		DE_NULL,														// const void*                                 pNext;
383 		(VkPipelineViewportStateCreateFlags)0,							// VkPipelineViewportStateCreateFlags          flags;
384 		1u,																// uint32_t                                    viewportCount;
385 		&viewport,														// const VkViewport*                           pViewports;
386 		1u,																// uint32_t                                    scissorCount;
387 		&scissor,														// const VkRect2D*                             pScissors;
388 	};
389 
390 	const VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateInfo =
391 	{
392 		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,		// VkStructureType                          sType;
393 		DE_NULL,														// const void*                              pNext;
394 		(VkPipelineRasterizationStateCreateFlags)0,						// VkPipelineRasterizationStateCreateFlags  flags;
395 		VK_FALSE,														// VkBool32                                 depthClampEnable;
396 		VK_FALSE,														// VkBool32                                 rasterizerDiscardEnable;
397 		VK_POLYGON_MODE_FILL,											// VkPolygonMode							polygonMode;
398 		VK_CULL_MODE_NONE,												// VkCullModeFlags							cullMode;
399 		VK_FRONT_FACE_COUNTER_CLOCKWISE,								// VkFrontFace								frontFace;
400 		VK_FALSE,														// VkBool32									depthBiasEnable;
401 		0.0f,															// float									depthBiasConstantFactor;
402 		0.0f,															// float									depthBiasClamp;
403 		0.0f,															// float									depthBiasSlopeFactor;
404 		1.0f,															// float									lineWidth;
405 	};
406 
407 	const VkPipelineMultisampleStateCreateInfo pipelineMultisampleStateInfo =
408 	{
409 		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,		// VkStructureType							sType;
410 		DE_NULL,														// const void*								pNext;
411 		(VkPipelineMultisampleStateCreateFlags)0,						// VkPipelineMultisampleStateCreateFlags	flags;
412 		VK_SAMPLE_COUNT_1_BIT,											// VkSampleCountFlagBits					rasterizationSamples;
413 		VK_FALSE,														// VkBool32									sampleShadingEnable;
414 		0.0f,															// float									minSampleShading;
415 		DE_NULL,														// const VkSampleMask*						pSampleMask;
416 		VK_FALSE,														// VkBool32									alphaToCoverageEnable;
417 		VK_FALSE														// VkBool32									alphaToOneEnable;
418 	};
419 
420 	const VkStencilOpState stencilOpState = makeStencilOpState(
421 		VK_STENCIL_OP_KEEP,				// stencil fail
422 		VK_STENCIL_OP_KEEP,				// depth & stencil pass
423 		VK_STENCIL_OP_KEEP,				// depth only fail
424 		VK_COMPARE_OP_ALWAYS,			// compare op
425 		0u,								// compare mask
426 		0u,								// write mask
427 		0u);							// reference
428 
429 	VkPipelineDepthStencilStateCreateInfo pipelineDepthStencilStateInfo =
430 	{
431 		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,		// VkStructureType							sType;
432 		DE_NULL,														// const void*								pNext;
433 		(VkPipelineDepthStencilStateCreateFlags)0,						// VkPipelineDepthStencilStateCreateFlags	flags;
434 		VK_FALSE,														// VkBool32									depthTestEnable;
435 		VK_FALSE,														// VkBool32									depthWriteEnable;
436 		VK_COMPARE_OP_LESS,												// VkCompareOp								depthCompareOp;
437 		VK_FALSE,														// VkBool32									depthBoundsTestEnable;
438 		VK_FALSE,														// VkBool32									stencilTestEnable;
439 		stencilOpState,													// VkStencilOpState							front;
440 		stencilOpState,													// VkStencilOpState							back;
441 		0.0f,															// float									minDepthBounds;
442 		1.0f,															// float									maxDepthBounds;
443 	};
444 
445 	const VkColorComponentFlags					colorComponentsAll					= VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
446 	const VkPipelineColorBlendAttachmentState	pipelineColorBlendAttachmentState	=
447 	{
448 		VK_FALSE,						// VkBool32					blendEnable;
449 		VK_BLEND_FACTOR_ONE,			// VkBlendFactor			srcColorBlendFactor;
450 		VK_BLEND_FACTOR_ZERO,			// VkBlendFactor			dstColorBlendFactor;
451 		VK_BLEND_OP_ADD,				// VkBlendOp				colorBlendOp;
452 		VK_BLEND_FACTOR_ONE,			// VkBlendFactor			srcAlphaBlendFactor;
453 		VK_BLEND_FACTOR_ZERO,			// VkBlendFactor			dstAlphaBlendFactor;
454 		VK_BLEND_OP_ADD,				// VkBlendOp				alphaBlendOp;
455 		colorComponentsAll,				// VkColorComponentFlags	colorWriteMask;
456 	};
457 
458 	const VkPipelineColorBlendStateCreateInfo pipelineColorBlendStateInfo =
459 	{
460 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,		// VkStructureType								sType;
461 		DE_NULL,														// const void*									pNext;
462 		(VkPipelineColorBlendStateCreateFlags)0,						// VkPipelineColorBlendStateCreateFlags			flags;
463 		VK_FALSE,														// VkBool32										logicOpEnable;
464 		VK_LOGIC_OP_COPY,												// VkLogicOp									logicOp;
465 		1u,																// deUint32										attachmentCount;
466 		&pipelineColorBlendAttachmentState,								// const VkPipelineColorBlendAttachmentState*	pAttachments;
467 		{ 0.0f, 0.0f, 0.0f, 0.0f },										// float										blendConstants[4];
468 	};
469 
470 	const VkGraphicsPipelineCreateInfo graphicsPipelineInfo =
471 	{
472 		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,	// VkStructureType									sType;
473 		DE_NULL,											// const void*										pNext;
474 		(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
475 		stageCount,											// deUint32											stageCount;
476 		pStages,											// const VkPipelineShaderStageCreateInfo*			pStages;
477 		&vertexInputStateInfo,								// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
478 		&pipelineInputAssemblyStateInfo,					// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
479 		DE_NULL,											// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
480 		&pipelineViewportStateInfo,							// const VkPipelineViewportStateCreateInfo*			pViewportState;
481 		&pipelineRasterizationStateInfo,					// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
482 		&pipelineMultisampleStateInfo,						// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
483 		&pipelineDepthStencilStateInfo,						// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
484 		&pipelineColorBlendStateInfo,						// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
485 		DE_NULL,											// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
486 		pipelineLayout,										// VkPipelineLayout									layout;
487 		renderPass,											// VkRenderPass										renderPass;
488 		0u,													// deUint32											subpass;
489 		DE_NULL,											// VkPipeline										basePipelineHandle;
490 		0,													// deInt32											basePipelineIndex;
491 	};
492 
493 	return createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineInfo);
494 }
495 
496 //! Return true if there are any red (or all zero) pixels in the image
imageHasErrorPixels(const tcu::ConstPixelBufferAccess image)497 bool imageHasErrorPixels (const tcu::ConstPixelBufferAccess image)
498 {
499 	const Vec4 errorColor	= Vec4(1.0f, 0.0f, 0.0f, 1.0f);
500 	const Vec4 blankColor	= Vec4();
501 
502 	for (int y = 0; y < image.getHeight(); ++y)
503 	for (int x = 0; x < image.getWidth(); ++x)
504 	{
505 		const Vec4 color = image.getPixel(x, y);
506 		if (color == errorColor || color == blankColor)
507 			return true;
508 	}
509 
510 	return false;
511 }
512 
513 class Renderer
514 {
515 public:
516 	typedef std::map<VkShaderStageFlagBits, const VkSpecializationInfo*>	SpecializationMap;
517 
518 	//! Use the delegate to bind descriptor sets, vertex buffers, etc. and make a draw call
519 	struct Delegate
520 	{
~Delegatevkt::sparse::__anon331f327e0111::Renderer::Delegate521 		virtual			~Delegate		(void) {}
522 		virtual void	rendererDraw	(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const = 0;
523 	};
524 
Renderer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const deUint32 queueFamilyIndex,const VkDescriptorSetLayout descriptorSetLayout,BinaryCollection & binaryCollection,const std::string & vertexName,const std::string & fragmentName,const VkBuffer colorBuffer,const IVec2 & renderSize,const VkFormat colorFormat,const Vec4 & clearColor,const VkPrimitiveTopology topology,SpecializationMap specMap=SpecializationMap ())525 	Renderer (const DeviceInterface&		vk,
526 			  const VkDevice				device,
527 			  Allocator&					allocator,
528 			  const deUint32				queueFamilyIndex,
529 			  const VkDescriptorSetLayout	descriptorSetLayout,	//!< may be NULL, if no descriptors are used
530 			  BinaryCollection&				binaryCollection,
531 			  const std::string&			vertexName,
532 			  const std::string&			fragmentName,
533 			  const VkBuffer				colorBuffer,
534 			  const IVec2&					renderSize,
535 			  const VkFormat				colorFormat,
536 			  const Vec4&					clearColor,
537 			  const VkPrimitiveTopology		topology,
538 			  SpecializationMap				specMap = SpecializationMap())
539 		: m_colorBuffer				(colorBuffer)
540 		, m_renderSize				(renderSize)
541 		, m_colorFormat				(colorFormat)
542 		, m_colorSubresourceRange	(makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u))
543 		, m_clearColor				(clearColor)
544 		, m_topology				(topology)
545 		, m_descriptorSetLayout		(descriptorSetLayout)
546 	{
547 		m_colorImage		= makeImage		(vk, device, makeImageCreateInfo(m_colorFormat, m_renderSize, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT));
548 		m_colorImageAlloc	= bindImage		(vk, device, allocator, *m_colorImage, MemoryRequirement::Any);
549 		m_colorAttachment	= makeImageView	(vk, device, *m_colorImage, VK_IMAGE_VIEW_TYPE_2D, m_colorFormat, m_colorSubresourceRange);
550 
551 		m_vertexModule		= createShaderModule	(vk, device, binaryCollection.get(vertexName), 0u);
552 		m_fragmentModule	= createShaderModule	(vk, device, binaryCollection.get(fragmentName), 0u);
553 
554 		const VkPipelineShaderStageCreateInfo pShaderStages[] =
555 		{
556 			{
557 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// VkStructureType						sType;
558 				DE_NULL,													// const void*							pNext;
559 				(VkPipelineShaderStageCreateFlags)0,						// VkPipelineShaderStageCreateFlags		flags;
560 				VK_SHADER_STAGE_VERTEX_BIT,									// VkShaderStageFlagBits				stage;
561 				*m_vertexModule,											// VkShaderModule						module;
562 				"main",														// const char*							pName;
563 				specMap[VK_SHADER_STAGE_VERTEX_BIT],						// const VkSpecializationInfo*			pSpecializationInfo;
564 			},
565 			{
566 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// VkStructureType						sType;
567 				DE_NULL,													// const void*							pNext;
568 				(VkPipelineShaderStageCreateFlags)0,						// VkPipelineShaderStageCreateFlags		flags;
569 				VK_SHADER_STAGE_FRAGMENT_BIT,								// VkShaderStageFlagBits				stage;
570 				*m_fragmentModule,											// VkShaderModule						module;
571 				"main",														// const char*							pName;
572 				specMap[VK_SHADER_STAGE_FRAGMENT_BIT],						// const VkSpecializationInfo*			pSpecializationInfo;
573 			}
574 		};
575 
576 		m_renderPass		= makeRenderPass		(vk, device, m_colorFormat);
577 		m_framebuffer		= makeFramebuffer		(vk, device, *m_renderPass, 1u, &m_colorAttachment.get(),
578 													 static_cast<deUint32>(m_renderSize.x()), static_cast<deUint32>(m_renderSize.y()));
579 		m_pipelineLayout	= makePipelineLayout	(vk, device, m_descriptorSetLayout);
580 		m_pipeline			= makeGraphicsPipeline	(vk, device, *m_pipelineLayout, *m_renderPass, m_renderSize, m_topology, DE_LENGTH_OF_ARRAY(pShaderStages), pShaderStages);
581 		m_cmdPool			= makeCommandPool		(vk, device, queueFamilyIndex);
582 		m_cmdBuffer			= allocateCommandBuffer	(vk, device, *m_cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
583 	}
584 
draw(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const Delegate & drawDelegate,const bool useDeviceGroups,const deUint32 deviceID) const585 	void draw (const DeviceInterface&	vk,
586 			   const VkDevice			device,
587 			   const VkQueue			queue,
588 			   const Delegate&			drawDelegate,
589 			   const bool				useDeviceGroups,
590 			   const deUint32			deviceID) const
591 	{
592 		beginCommandBuffer(vk, *m_cmdBuffer);
593 
594 		const VkClearValue			clearValue	= makeClearValueColor(m_clearColor);
595 		const VkRect2D				renderArea	=
596 		{
597 			makeOffset2D(0, 0),
598 			makeExtent2D(m_renderSize.x(), m_renderSize.y()),
599 		};
600 		const VkRenderPassBeginInfo renderPassBeginInfo =
601 		{
602 			VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,		// VkStructureType         sType;
603 			DE_NULL,										// const void*             pNext;
604 			*m_renderPass,									// VkRenderPass            renderPass;
605 			*m_framebuffer,									// VkFramebuffer           framebuffer;
606 			renderArea,										// VkRect2D                renderArea;
607 			1u,												// uint32_t                clearValueCount;
608 			&clearValue,									// const VkClearValue*     pClearValues;
609 		};
610 		vk.cmdBeginRenderPass(*m_cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
611 
612 		vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
613 		drawDelegate.rendererDraw(*m_pipelineLayout, *m_cmdBuffer);
614 
615 		vk.cmdEndRenderPass(*m_cmdBuffer);
616 
617 		// Prepare color image for copy
618 		{
619 			const VkImageMemoryBarrier barriers[] =
620 			{
621 				{
622 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,						// VkStructureType			sType;
623 					DE_NULL,													// const void*				pNext;
624 					VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,						// VkAccessFlags			outputMask;
625 					VK_ACCESS_TRANSFER_READ_BIT,								// VkAccessFlags			inputMask;
626 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,					// VkImageLayout			oldLayout;
627 					VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,						// VkImageLayout			newLayout;
628 					VK_QUEUE_FAMILY_IGNORED,									// deUint32					srcQueueFamilyIndex;
629 					VK_QUEUE_FAMILY_IGNORED,									// deUint32					destQueueFamilyIndex;
630 					*m_colorImage,												// VkImage					image;
631 					m_colorSubresourceRange,									// VkImageSubresourceRange	subresourceRange;
632 				},
633 			};
634 
635 			vk.cmdPipelineBarrier(*m_cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
636 				0u, DE_NULL, 0u, DE_NULL, DE_LENGTH_OF_ARRAY(barriers), barriers);
637 		}
638 		// Color image -> host buffer
639 		{
640 			const VkBufferImageCopy region =
641 			{
642 				0ull,																		// VkDeviceSize                bufferOffset;
643 				0u,																			// uint32_t                    bufferRowLength;
644 				0u,																			// uint32_t                    bufferImageHeight;
645 				makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u),			// VkImageSubresourceLayers    imageSubresource;
646 				makeOffset3D(0, 0, 0),														// VkOffset3D                  imageOffset;
647 				makeExtent3D(m_renderSize.x(), m_renderSize.y(), 1u),						// VkExtent3D                  imageExtent;
648 			};
649 
650 			vk.cmdCopyImageToBuffer(*m_cmdBuffer, *m_colorImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_colorBuffer, 1u, &region);
651 		}
652 		// Buffer write barrier
653 		{
654 			const VkBufferMemoryBarrier barriers[] =
655 			{
656 				{
657 					VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,		// VkStructureType    sType;
658 					DE_NULL,										// const void*        pNext;
659 					VK_ACCESS_TRANSFER_WRITE_BIT,					// VkAccessFlags      srcAccessMask;
660 					VK_ACCESS_HOST_READ_BIT,						// VkAccessFlags      dstAccessMask;
661 					VK_QUEUE_FAMILY_IGNORED,						// uint32_t           srcQueueFamilyIndex;
662 					VK_QUEUE_FAMILY_IGNORED,						// uint32_t           dstQueueFamilyIndex;
663 					m_colorBuffer,									// VkBuffer           buffer;
664 					0ull,											// VkDeviceSize       offset;
665 					VK_WHOLE_SIZE,									// VkDeviceSize       size;
666 				},
667 			};
668 
669 			vk.cmdPipelineBarrier(*m_cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u,
670 				0u, DE_NULL, DE_LENGTH_OF_ARRAY(barriers), barriers, DE_NULL, 0u);
671 		}
672 
673 		VK_CHECK(vk.endCommandBuffer(*m_cmdBuffer));
674 		submitCommandsAndWait(vk, device, queue, *m_cmdBuffer, 0U, DE_NULL, DE_NULL, 0U, DE_NULL, useDeviceGroups, deviceID);
675 	}
676 
677 private:
678 	const VkBuffer					m_colorBuffer;
679 	const IVec2						m_renderSize;
680 	const VkFormat					m_colorFormat;
681 	const VkImageSubresourceRange	m_colorSubresourceRange;
682 	const Vec4						m_clearColor;
683 	const VkPrimitiveTopology		m_topology;
684 	const VkDescriptorSetLayout		m_descriptorSetLayout;
685 
686 	Move<VkImage>					m_colorImage;
687 	MovePtr<Allocation>				m_colorImageAlloc;
688 	Move<VkImageView>				m_colorAttachment;
689 	Move<VkShaderModule>			m_vertexModule;
690 	Move<VkShaderModule>			m_fragmentModule;
691 	Move<VkRenderPass>				m_renderPass;
692 	Move<VkFramebuffer>				m_framebuffer;
693 	Move<VkPipelineLayout>			m_pipelineLayout;
694 	Move<VkPipeline>				m_pipeline;
695 	Move<VkCommandPool>				m_cmdPool;
696 	Move<VkCommandBuffer>			m_cmdBuffer;
697 
698 	// "deleted"
699 				Renderer	(const Renderer&);
700 	Renderer&	operator=	(const Renderer&);
701 };
702 
bindSparseBuffer(const DeviceInterface & vk,const VkDevice device,const VkQueue sparseQueue,const VkBuffer buffer,const SparseAllocation & sparseAllocation,const bool useDeviceGroups,deUint32 resourceDevId,deUint32 memoryDeviceId)703 void bindSparseBuffer (const DeviceInterface& vk, const VkDevice device, const VkQueue sparseQueue, const VkBuffer buffer, const SparseAllocation& sparseAllocation,
704 						const bool useDeviceGroups, deUint32 resourceDevId, deUint32 memoryDeviceId)
705 {
706 	const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo =
707 	{
708 		buffer,														// VkBuffer                     buffer;
709 		static_cast<deUint32>(sparseAllocation.memoryBinds.size()),	// uint32_t                     bindCount;
710 		&sparseAllocation.memoryBinds[0],							// const VkSparseMemoryBind*    pBinds;
711 	};
712 
713 	const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
714 	{
715 		VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR,		//VkStructureType							sType;
716 		DE_NULL,													//const void*								pNext;
717 		resourceDevId,												//deUint32									resourceDeviceIndex;
718 		memoryDeviceId,												//deUint32									memoryDeviceIndex;
719 	};
720 
721 	const VkBindSparseInfo bindInfo =
722 	{
723 		VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,							// VkStructureType                             sType;
724 		useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL,		// const void*                                 pNext;
725 		0u,															// uint32_t                                    waitSemaphoreCount;
726 		DE_NULL,													// const VkSemaphore*                          pWaitSemaphores;
727 		1u,															// uint32_t                                    bufferBindCount;
728 		&sparseBufferMemoryBindInfo,								// const VkSparseBufferMemoryBindInfo*         pBufferBinds;
729 		0u,															// uint32_t                                    imageOpaqueBindCount;
730 		DE_NULL,													// const VkSparseImageOpaqueMemoryBindInfo*    pImageOpaqueBinds;
731 		0u,															// uint32_t                                    imageBindCount;
732 		DE_NULL,													// const VkSparseImageMemoryBindInfo*          pImageBinds;
733 		0u,															// uint32_t                                    signalSemaphoreCount;
734 		DE_NULL,													// const VkSemaphore*                          pSignalSemaphores;
735 	};
736 
737 	const Unique<VkFence> fence(createFence(vk, device));
738 
739 	VK_CHECK(vk.queueBindSparse(sparseQueue, 1u, &bindInfo, *fence));
740 	VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), VK_TRUE, ~0ull));
741 }
742 
743 class SparseBufferTestInstance : public SparseResourcesBaseInstance, Renderer::Delegate
744 {
745 public:
SparseBufferTestInstance(Context & context,const TestFlags flags)746 	SparseBufferTestInstance (Context& context, const TestFlags flags)
747 		: SparseResourcesBaseInstance	(context, (flags & TEST_FLAG_ENABLE_DEVICE_GROUPS) != 0)
748 		, m_aliased						((flags & TEST_FLAG_ALIASED)   != 0)
749 		, m_residency					((flags & TEST_FLAG_RESIDENCY) != 0)
750 		, m_nonResidentStrict			((flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0)
751 		, m_renderSize					(RENDER_SIZE, RENDER_SIZE)
752 		, m_colorFormat					(VK_FORMAT_R8G8B8A8_UNORM)
753 		, m_colorBufferSize				(m_renderSize.x() * m_renderSize.y() * tcu::getPixelSize(mapVkFormat(m_colorFormat)))
754 	{
755 		{
756 			QueueRequirementsVec requirements;
757 			requirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
758 			requirements.push_back(QueueRequirements(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 1u));
759 
760 			createDeviceSupportingQueues(requirements);
761 		}
762 		const VkPhysicalDeviceFeatures	features	= getPhysicalDeviceFeatures(m_context.getInstanceInterface(), getPhysicalDevice());
763 
764 		if (!features.sparseBinding)
765 			TCU_THROW(NotSupportedError, "Missing feature: sparseBinding");
766 
767 		if (m_residency && !features.sparseResidencyBuffer)
768 			TCU_THROW(NotSupportedError, "Missing feature: sparseResidencyBuffer");
769 
770 		if (m_aliased && !features.sparseResidencyAliased)
771 			TCU_THROW(NotSupportedError, "Missing feature: sparseResidencyAliased");
772 
773 		if (m_nonResidentStrict && !m_context.getDeviceProperties().sparseProperties.residencyNonResidentStrict)
774 			TCU_THROW(NotSupportedError, "Missing sparse property: residencyNonResidentStrict");
775 
776 		const DeviceInterface& vk		= getDeviceInterface();
777 		m_sparseQueue					= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0u);
778 		m_universalQueue				= getQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0u);
779 
780 		m_sharedQueueFamilyIndices[0]	= m_sparseQueue.queueFamilyIndex;
781 		m_sharedQueueFamilyIndices[1]	= m_universalQueue.queueFamilyIndex;
782 
783 		m_colorBuffer					= makeBuffer(vk, getDevice(), makeBufferCreateInfo(m_colorBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT));
784 		m_colorBufferAlloc				= bindBuffer(vk, getDevice(), getAllocator(), *m_colorBuffer, MemoryRequirement::HostVisible);
785 
786 		deMemset(m_colorBufferAlloc->getHostPtr(), 0, static_cast<std::size_t>(m_colorBufferSize));
787 		flushMappedMemoryRange(vk, getDevice(), m_colorBufferAlloc->getMemory(), m_colorBufferAlloc->getOffset(), m_colorBufferSize);
788 	}
789 
790 protected:
getSparseBufferCreateInfo(const VkBufferUsageFlags usage) const791 	VkBufferCreateInfo getSparseBufferCreateInfo (const VkBufferUsageFlags usage) const
792 	{
793 		VkBufferCreateFlags	flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
794 		if (m_residency)
795 			flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
796 		if (m_aliased)
797 			flags |= VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
798 
799 		VkBufferCreateInfo referenceBufferCreateInfo =
800 		{
801 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,				// VkStructureType        sType;
802 			DE_NULL,											// const void*            pNext;
803 			flags,												// VkBufferCreateFlags    flags;
804 			0u,	// override later								// VkDeviceSize           size;
805 			VK_BUFFER_USAGE_TRANSFER_DST_BIT | usage,			// VkBufferUsageFlags     usage;
806 			VK_SHARING_MODE_EXCLUSIVE,							// VkSharingMode          sharingMode;
807 			0u,													// uint32_t               queueFamilyIndexCount;
808 			DE_NULL,											// const uint32_t*        pQueueFamilyIndices;
809 		};
810 
811 		if (m_sparseQueue.queueFamilyIndex != m_universalQueue.queueFamilyIndex)
812 		{
813 			referenceBufferCreateInfo.sharingMode			= VK_SHARING_MODE_CONCURRENT;
814 			referenceBufferCreateInfo.queueFamilyIndexCount	= DE_LENGTH_OF_ARRAY(m_sharedQueueFamilyIndices);
815 			referenceBufferCreateInfo.pQueueFamilyIndices	= m_sharedQueueFamilyIndices;
816 		}
817 
818 		return referenceBufferCreateInfo;
819 	}
820 
draw(const VkPrimitiveTopology topology,const VkDescriptorSetLayout descriptorSetLayout=DE_NULL,Renderer::SpecializationMap specMap=Renderer::SpecializationMap (),bool useDeviceGroups=false,deUint32 deviceID=0)821 	void draw (const VkPrimitiveTopology	topology,
822 			   const VkDescriptorSetLayout	descriptorSetLayout	= DE_NULL,
823 			   Renderer::SpecializationMap	specMap				= Renderer::SpecializationMap(),
824 			   bool							useDeviceGroups		= false,
825 			   deUint32						deviceID			= 0)
826 	{
827 		const UniquePtr<Renderer> renderer(new Renderer(
828 			getDeviceInterface(), getDevice(), getAllocator(), m_universalQueue.queueFamilyIndex, descriptorSetLayout,
829 			m_context.getBinaryCollection(), "vert", "frag", *m_colorBuffer, m_renderSize, m_colorFormat, Vec4(1.0f, 0.0f, 0.0f, 1.0f), topology, specMap));
830 
831 		renderer->draw(getDeviceInterface(), getDevice(), m_universalQueue.queueHandle, *this, useDeviceGroups, deviceID);
832 	}
833 
isResultImageCorrect(void) const834 	bool isResultImageCorrect (void) const
835 	{
836 		invalidateMappedMemoryRange(getDeviceInterface(), getDevice(), m_colorBufferAlloc->getMemory(), 0ull, m_colorBufferSize);
837 
838 		const tcu::ConstPixelBufferAccess resultImage (mapVkFormat(m_colorFormat), m_renderSize.x(), m_renderSize.y(), 1u, m_colorBufferAlloc->getHostPtr());
839 
840 		m_context.getTestContext().getLog()
841 			<< tcu::LogImageSet("Result", "Result") << tcu::LogImage("color0", "", resultImage) << tcu::TestLog::EndImageSet;
842 
843 		return !imageHasErrorPixels(resultImage);
844 	}
845 
846 	const bool							m_aliased;
847 	const bool							m_residency;
848 	const bool							m_nonResidentStrict;
849 
850 	Queue								m_sparseQueue;
851 	Queue								m_universalQueue;
852 
853 private:
854 	const IVec2							m_renderSize;
855 	const VkFormat						m_colorFormat;
856 	const VkDeviceSize					m_colorBufferSize;
857 
858 	Move<VkBuffer>						m_colorBuffer;
859 	MovePtr<Allocation>					m_colorBufferAlloc;
860 
861 	deUint32							m_sharedQueueFamilyIndices[2];
862 };
863 
initProgramsDrawWithUBO(vk::SourceCollections & programCollection,const TestFlags flags)864 void initProgramsDrawWithUBO (vk::SourceCollections& programCollection, const TestFlags flags)
865 {
866 	// Vertex shader
867 	{
868 		std::ostringstream src;
869 		src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
870 			<< "\n"
871 			<< "layout(location = 0) in vec4 in_position;\n"
872 			<< "\n"
873 			<< "out gl_PerVertex {\n"
874 			<< "    vec4 gl_Position;\n"
875 			<< "};\n"
876 			<< "\n"
877 			<< "void main(void)\n"
878 			<< "{\n"
879 			<< "    gl_Position = in_position;\n"
880 			<< "}\n";
881 
882 		programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
883 	}
884 
885 	// Fragment shader
886 	{
887 		const bool			aliased				= (flags & TEST_FLAG_ALIASED) != 0;
888 		const bool			residency			= (flags & TEST_FLAG_RESIDENCY) != 0;
889 		const bool			nonResidentStrict	= (flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0;
890 		const std::string	valueExpr			= (aliased ? "ivec4(3*(ndx % nonAliasedSize) ^ 127, 0, 0, 0)" : "ivec4(3*ndx ^ 127, 0, 0, 0)");
891 
892 		std::ostringstream src;
893 		src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
894 			<< "\n"
895 			<< "layout(location = 0) out vec4 o_color;\n"
896 			<< "\n"
897 			<< "layout(constant_id = 1) const int dataSize  = 1;\n"
898 			<< "layout(constant_id = 2) const int chunkSize = 1;\n"
899 			<< "\n"
900 			<< "layout(set = 0, binding = 0, std140) uniform SparseBuffer {\n"
901 			<< "    ivec4 data[dataSize];\n"
902 			<< "} ubo;\n"
903 			<< "\n"
904 			<< "void main(void)\n"
905 			<< "{\n"
906 			<< "    const int fragNdx        = int(gl_FragCoord.x) + " << RENDER_SIZE << " * int(gl_FragCoord.y);\n"
907 			<< "    const int pageSize       = " << RENDER_SIZE << " * " << RENDER_SIZE << ";\n"
908 			<< "    const int numChunks      = dataSize / chunkSize;\n";
909 
910 		if (aliased)
911 			src << "    const int nonAliasedSize = (numChunks > 1 ? dataSize - chunkSize : dataSize);\n";
912 
913 		src << "    bool      ok             = true;\n"
914 			<< "\n"
915 			<< "    for (int ndx = fragNdx; ndx < dataSize; ndx += pageSize)\n"
916 			<< "    {\n";
917 
918 		if (residency && nonResidentStrict)
919 		{
920 			src << "        if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
921 				<< "            ok = ok && (ubo.data[ndx] == ivec4(0));\n"
922 				<< "        else\n"
923 				<< "            ok = ok && (ubo.data[ndx] == " + valueExpr + ");\n";
924 		}
925 		else if (residency)
926 		{
927 			src << "        if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
928 				<< "            continue;\n"
929 				<< "        ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
930 		}
931 		else
932 			src << "        ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
933 
934 		src << "    }\n"
935 			<< "\n"
936 			<< "    if (ok)\n"
937 			<< "        o_color = vec4(0.0, 1.0, 0.0, 1.0);\n"
938 			<< "    else\n"
939 			<< "        o_color = vec4(1.0, 0.0, 0.0, 1.0);\n"
940 			<< "}\n";
941 
942 		programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
943 	}
944 }
945 
946 //! Sparse buffer backing a UBO
947 class UBOTestInstance : public SparseBufferTestInstance
948 {
949 public:
UBOTestInstance(Context & context,const TestFlags flags)950 	UBOTestInstance (Context& context, const TestFlags flags)
951 		: SparseBufferTestInstance	(context, flags)
952 	{
953 	}
954 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const955 	void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
956 	{
957 		const DeviceInterface&	vk				= getDeviceInterface();
958 		const VkDeviceSize		vertexOffset	= 0ull;
959 
960 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
961 		vk.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
962 		vk.cmdDraw				(cmdBuffer, 4u, 1u, 0u, 0u);
963 	}
964 
iterate(void)965 	tcu::TestStatus iterate (void)
966 	{
967 		const DeviceInterface&		vk					= getDeviceInterface();
968 		MovePtr<SparseAllocation>	sparseAllocation;
969 		Move<VkBuffer>				sparseBuffer;
970 		Move<VkBuffer>				sparseBufferAliased;
971 		bool						setupDescriptors	= true;
972 
973 		// Go through all physical devices
974 		for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
975 		{
976 			const deUint32	firstDeviceID	= physDevID;
977 			const deUint32	secondDeviceID	= (firstDeviceID + 1) % m_numPhysicalDevices;
978 
979 			// Set up the sparse buffer
980 			{
981 				VkBufferCreateInfo	referenceBufferCreateInfo	= getSparseBufferCreateInfo(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
982 				const VkDeviceSize	minChunkSize				= 512u;	// make sure the smallest allocation is at least this big
983 				deUint32			numMaxChunks				= 0u;
984 
985 				// Check how many chunks we can allocate given the alignment and size requirements of UBOs
986 				{
987 					const UniquePtr<SparseAllocation> minAllocation(SparseAllocationBuilder()
988 						.addMemoryBind()
989 						.build(vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize));
990 
991 					numMaxChunks = deMaxu32(static_cast<deUint32>(m_context.getDeviceProperties().limits.maxUniformBufferRange / minAllocation->resourceSize), 1u);
992 				}
993 
994 				if (numMaxChunks < 4)
995 				{
996 					sparseAllocation = SparseAllocationBuilder()
997 						.addMemoryBind()
998 						.build(vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
999 				}
1000 				else
1001 				{
1002 					// Try to use a non-trivial memory allocation scheme to make it different from a non-sparse binding
1003 					SparseAllocationBuilder builder;
1004 					builder.addMemoryBind();
1005 
1006 					if (m_residency)
1007 						builder.addResourceHole();
1008 
1009 					builder
1010 						.addMemoryAllocation()
1011 						.addMemoryHole()
1012 						.addMemoryBind();
1013 
1014 					if (m_aliased)
1015 						builder.addAliasedMemoryBind(0u, 0u);
1016 
1017 					sparseAllocation = builder.build(vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
1018 					DE_ASSERT(sparseAllocation->resourceSize <= m_context.getDeviceProperties().limits.maxUniformBufferRange);
1019 				}
1020 
1021 				// Create the buffer
1022 				referenceBufferCreateInfo.size	= sparseAllocation->resourceSize;
1023 				sparseBuffer					= makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
1024 				bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBuffer, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
1025 
1026 				if (m_aliased)
1027 				{
1028 					sparseBufferAliased = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
1029 					bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBufferAliased, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
1030 				}
1031 			}
1032 
1033 			// Set uniform data
1034 			{
1035 				const bool					hasAliasedChunk		= (m_aliased && sparseAllocation->memoryBinds.size() > 1u);
1036 				const VkDeviceSize			chunkSize			= sparseAllocation->resourceSize / sparseAllocation->numResourceChunks;
1037 				const VkDeviceSize			stagingBufferSize	= sparseAllocation->resourceSize - (hasAliasedChunk ? chunkSize : 0);
1038 				const deUint32				numBufferEntries	= static_cast<deUint32>(stagingBufferSize / sizeof(IVec4));
1039 
1040 				const Unique<VkBuffer>		stagingBuffer		(makeBuffer(vk, getDevice(), makeBufferCreateInfo(stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT)));
1041 				const UniquePtr<Allocation>	stagingBufferAlloc	(bindBuffer(vk, getDevice(), getAllocator(), *stagingBuffer, MemoryRequirement::HostVisible));
1042 
1043 				{
1044 					// If aliased chunk is used, the staging buffer is smaller than the sparse buffer and we don't overwrite the last chunk
1045 					IVec4* const pData = static_cast<IVec4*>(stagingBufferAlloc->getHostPtr());
1046 					for (deUint32 i = 0; i < numBufferEntries; ++i)
1047 						pData[i] = IVec4(3*i ^ 127, 0, 0, 0);
1048 
1049 					flushMappedMemoryRange(vk, getDevice(), stagingBufferAlloc->getMemory(), stagingBufferAlloc->getOffset(), stagingBufferSize);
1050 
1051 					const VkBufferCopy copyRegion =
1052 					{
1053 						0ull,						// VkDeviceSize    srcOffset;
1054 						0ull,						// VkDeviceSize    dstOffset;
1055 						stagingBufferSize,			// VkDeviceSize    size;
1056 					};
1057 
1058 					const Unique<VkCommandPool>		cmdPool		(makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
1059 					const Unique<VkCommandBuffer>	cmdBuffer	(allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1060 
1061 					beginCommandBuffer	(vk, *cmdBuffer);
1062 					vk.cmdCopyBuffer	(*cmdBuffer, *stagingBuffer, *sparseBuffer, 1u, &copyRegion);
1063 					endCommandBuffer	(vk, *cmdBuffer);
1064 
1065 					submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
1066 					// Once the fence is signaled, the write is also available to the aliasing buffer.
1067 				}
1068 			}
1069 
1070 			// Make sure that we don't try to access a larger range than is allowed. This only applies to a single chunk case.
1071 			const deUint32 maxBufferRange = deMinu32(static_cast<deUint32>(sparseAllocation->resourceSize), m_context.getDeviceProperties().limits.maxUniformBufferRange);
1072 
1073 			// Descriptor sets
1074 			{
1075 				// Setup only once
1076 				if (setupDescriptors)
1077 				{
1078 					m_descriptorSetLayout = DescriptorSetLayoutBuilder()
1079 						.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT)
1080 						.build(vk, getDevice());
1081 
1082 					m_descriptorPool = DescriptorPoolBuilder()
1083 						.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1084 						.build(vk, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1085 
1086 					m_descriptorSet = makeDescriptorSet(vk, getDevice(), *m_descriptorPool, *m_descriptorSetLayout);
1087 					setupDescriptors = false;
1088 				}
1089 
1090 				const VkBuffer					buffer				= (m_aliased ? *sparseBufferAliased : *sparseBuffer);
1091 				const VkDescriptorBufferInfo	sparseBufferInfo	= makeDescriptorBufferInfo(buffer, 0ull, maxBufferRange);
1092 
1093 				DescriptorSetUpdateBuilder()
1094 					.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &sparseBufferInfo)
1095 					.update(vk, getDevice());
1096 			}
1097 
1098 			// Vertex data
1099 			{
1100 				const Vec4 vertexData[] =
1101 				{
1102 					Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
1103 					Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
1104 					Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
1105 					Vec4( 1.0f,  1.0f, 0.0f, 1.0f),
1106 				};
1107 
1108 				const VkDeviceSize	vertexBufferSize	= sizeof(vertexData);
1109 
1110 				m_vertexBuffer		= makeBuffer(vk, getDevice(), makeBufferCreateInfo(vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1111 				m_vertexBufferAlloc	= bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1112 
1113 				deMemcpy(m_vertexBufferAlloc->getHostPtr(), &vertexData[0], vertexBufferSize);
1114 				flushMappedMemoryRange(vk, getDevice(), m_vertexBufferAlloc->getMemory(), m_vertexBufferAlloc->getOffset(), vertexBufferSize);
1115 			}
1116 
1117 			// Draw
1118 			{
1119 				std::vector<deInt32> specializationData;
1120 				{
1121 					const deUint32	numBufferEntries	= maxBufferRange / static_cast<deUint32>(sizeof(IVec4));
1122 					const deUint32	numEntriesPerChunk	= numBufferEntries / sparseAllocation->numResourceChunks;
1123 
1124 					specializationData.push_back(numBufferEntries);
1125 					specializationData.push_back(numEntriesPerChunk);
1126 				}
1127 
1128 				const VkSpecializationMapEntry	specMapEntries[] =
1129 				{
1130 					{
1131 						1u,					// uint32_t    constantID;
1132 						0u,					// uint32_t    offset;
1133 						sizeof(deInt32),	// size_t      size;
1134 					},
1135 					{
1136 						2u,					// uint32_t    constantID;
1137 						sizeof(deInt32),	// uint32_t    offset;
1138 						sizeof(deInt32),	// size_t      size;
1139 					},
1140 				};
1141 
1142 				const VkSpecializationInfo specInfo =
1143 				{
1144 					DE_LENGTH_OF_ARRAY(specMapEntries),		// uint32_t                           mapEntryCount;
1145 					specMapEntries,							// const VkSpecializationMapEntry*    pMapEntries;
1146 					sizeInBytes(specializationData),		// size_t                             dataSize;
1147 					getDataOrNullptr(specializationData),	// const void*                        pData;
1148 				};
1149 
1150 				Renderer::SpecializationMap	specMap;
1151 				specMap[VK_SHADER_STAGE_FRAGMENT_BIT] = &specInfo;
1152 
1153 				draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, *m_descriptorSetLayout, specMap, usingDeviceGroups(), firstDeviceID);
1154 			}
1155 
1156 			if(!isResultImageCorrect())
1157 				return tcu::TestStatus::fail("Some buffer values were incorrect");
1158 		}
1159 		return tcu::TestStatus::pass("Pass");
1160 	}
1161 
1162 private:
1163 	Move<VkBuffer>					m_vertexBuffer;
1164 	MovePtr<Allocation>				m_vertexBufferAlloc;
1165 
1166 	Move<VkDescriptorSetLayout>		m_descriptorSetLayout;
1167 	Move<VkDescriptorPool>			m_descriptorPool;
1168 	Move<VkDescriptorSet>			m_descriptorSet;
1169 };
1170 
initProgramsDrawGrid(vk::SourceCollections & programCollection,const TestFlags flags)1171 void initProgramsDrawGrid (vk::SourceCollections& programCollection, const TestFlags flags)
1172 {
1173 	DE_UNREF(flags);
1174 
1175 	// Vertex shader
1176 	{
1177 		std::ostringstream src;
1178 		src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1179 			<< "\n"
1180 			<< "layout(location = 0) in  vec4 in_position;\n"
1181 			<< "layout(location = 0) out int  out_ndx;\n"
1182 			<< "\n"
1183 			<< "out gl_PerVertex {\n"
1184 			<< "    vec4 gl_Position;\n"
1185 			<< "};\n"
1186 			<< "\n"
1187 			<< "void main(void)\n"
1188 			<< "{\n"
1189 			<< "    gl_Position = in_position;\n"
1190 			<< "    out_ndx     = gl_VertexIndex;\n"
1191 			<< "}\n";
1192 
1193 		programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
1194 	}
1195 
1196 	// Fragment shader
1197 	{
1198 		std::ostringstream src;
1199 		src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1200 			<< "\n"
1201 			<< "layout(location = 0) flat in  int  in_ndx;\n"
1202 			<< "layout(location = 0)      out vec4 o_color;\n"
1203 			<< "\n"
1204 			<< "void main(void)\n"
1205 			<< "{\n"
1206 			<< "    if (in_ndx % 2 == 0)\n"
1207 			<< "        o_color = vec4(vec3(1.0), 1.0);\n"
1208 			<< "    else\n"
1209 			<< "        o_color = vec4(vec3(0.75), 1.0);\n"
1210 			<< "}\n";
1211 
1212 		programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
1213 	}
1214 }
1215 
1216 //! Generate vertex positions for a grid of tiles composed of two triangles each (6 vertices)
generateGrid(void * pRawData,const float step,const float ox,const float oy,const deUint32 numX,const deUint32 numY,const float z=0.0f)1217 void generateGrid (void* pRawData, const float step, const float ox, const float oy, const deUint32 numX, const deUint32 numY, const float z = 0.0f)
1218 {
1219 	typedef Vec4 (*TilePtr)[6];
1220 
1221 	TilePtr const pData = static_cast<TilePtr>(pRawData);
1222 	{
1223 		for (deUint32 iy = 0; iy < numY; ++iy)
1224 		for (deUint32 ix = 0; ix < numX; ++ix)
1225 		{
1226 			const deUint32	ndx	= ix + numX * iy;
1227 			const float		x	= ox + step * static_cast<float>(ix);
1228 			const float		y	= oy + step * static_cast<float>(iy);
1229 
1230 			pData[ndx][0] = Vec4(x + step,	y,			z, 1.0f);
1231 			pData[ndx][1] = Vec4(x,			y,			z, 1.0f);
1232 			pData[ndx][2] = Vec4(x,			y + step,	z, 1.0f);
1233 
1234 			pData[ndx][3] = Vec4(x,			y + step,	z, 1.0f);
1235 			pData[ndx][4] = Vec4(x + step,	y + step,	z, 1.0f);
1236 			pData[ndx][5] = Vec4(x + step,	y,			z, 1.0f);
1237 		}
1238 	}
1239 }
1240 
1241 //! Base test for a sparse buffer backing a vertex/index buffer
1242 class DrawGridTestInstance : public SparseBufferTestInstance
1243 {
1244 public:
DrawGridTestInstance(Context & context,const TestFlags flags,const VkBufferUsageFlags usage,const VkDeviceSize minChunkSize)1245 	DrawGridTestInstance (Context& context, const TestFlags flags, const VkBufferUsageFlags usage, const VkDeviceSize minChunkSize)
1246 		: SparseBufferTestInstance	(context, flags)
1247 	{
1248 		const DeviceInterface&	vk							= getDeviceInterface();
1249 		VkBufferCreateInfo		referenceBufferCreateInfo	= getSparseBufferCreateInfo(usage);
1250 
1251 		{
1252 			// Allocate two chunks, each covering half of the viewport
1253 			SparseAllocationBuilder builder;
1254 			builder.addMemoryBind();
1255 
1256 			if (m_residency)
1257 				builder.addResourceHole();
1258 
1259 			builder
1260 				.addMemoryAllocation()
1261 				.addMemoryHole()
1262 				.addMemoryBind();
1263 
1264 			if (m_aliased)
1265 				builder.addAliasedMemoryBind(0u, 0u);
1266 
1267 			m_sparseAllocation	= builder.build(vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
1268 		}
1269 
1270 		// Create the buffer
1271 		referenceBufferCreateInfo.size	= m_sparseAllocation->resourceSize;
1272 		m_sparseBuffer					= makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
1273 
1274 
1275 		m_perDrawBufferOffset	= m_sparseAllocation->resourceSize / m_sparseAllocation->numResourceChunks;
1276 		m_stagingBufferSize		= 2 * m_perDrawBufferOffset;
1277 		m_stagingBuffer			= makeBuffer(vk, getDevice(), makeBufferCreateInfo(m_stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT));
1278 		m_stagingBufferAlloc	= bindBuffer(vk, getDevice(), getAllocator(), *m_stagingBuffer, MemoryRequirement::HostVisible);
1279 
1280 
1281 	}
1282 
iterate(void)1283 	tcu::TestStatus iterate (void)
1284 	{
1285 		const DeviceInterface&	vk	= getDeviceInterface();
1286 
1287 		for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
1288 		{
1289 			const deUint32	firstDeviceID	= physDevID;
1290 			const deUint32	secondDeviceID	= (firstDeviceID + 1) % m_numPhysicalDevices;
1291 
1292 			// Bind the memory
1293 			bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *m_sparseBuffer, *m_sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
1294 
1295 			initializeBuffers();
1296 
1297 			// Upload to the sparse buffer
1298 			{
1299 				flushMappedMemoryRange(vk, getDevice(), m_stagingBufferAlloc->getMemory(), m_stagingBufferAlloc->getOffset(), m_stagingBufferSize);
1300 
1301 				VkDeviceSize	firstChunkOffset	= 0ull;
1302 				VkDeviceSize	secondChunkOffset	= m_perDrawBufferOffset;
1303 
1304 				if (m_residency)
1305 					secondChunkOffset += m_perDrawBufferOffset;
1306 
1307 				if (m_aliased)
1308 					firstChunkOffset = secondChunkOffset + m_perDrawBufferOffset;
1309 
1310 				const VkBufferCopy copyRegions[] =
1311 				{
1312 					{
1313 						0ull,						// VkDeviceSize    srcOffset;
1314 						firstChunkOffset,			// VkDeviceSize    dstOffset;
1315 						m_perDrawBufferOffset,		// VkDeviceSize    size;
1316 					},
1317 					{
1318 						m_perDrawBufferOffset,		// VkDeviceSize    srcOffset;
1319 						secondChunkOffset,			// VkDeviceSize    dstOffset;
1320 						m_perDrawBufferOffset,		// VkDeviceSize    size;
1321 					},
1322 				};
1323 
1324 				const Unique<VkCommandPool>		cmdPool		(makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
1325 				const Unique<VkCommandBuffer>	cmdBuffer	(allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1326 
1327 				beginCommandBuffer	(vk, *cmdBuffer);
1328 				vk.cmdCopyBuffer	(*cmdBuffer, *m_stagingBuffer, *m_sparseBuffer, DE_LENGTH_OF_ARRAY(copyRegions), copyRegions);
1329 				endCommandBuffer	(vk, *cmdBuffer);
1330 
1331 				submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
1332 			}
1333 
1334 
1335 			Renderer::SpecializationMap	specMap;
1336 			draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, DE_NULL, specMap, usingDeviceGroups(), firstDeviceID);
1337 
1338 			if(!isResultImageCorrect())
1339 				return tcu::TestStatus::fail("Some buffer values were incorrect");
1340 		}
1341 		return tcu::TestStatus::pass("Pass");
1342 	}
1343 
1344 protected:
1345 	virtual void				initializeBuffers		(void) = 0;
1346 
1347 	VkDeviceSize				m_perDrawBufferOffset;
1348 
1349 	VkDeviceSize				m_stagingBufferSize;
1350 	Move<VkBuffer>				m_stagingBuffer;
1351 	MovePtr<Allocation>			m_stagingBufferAlloc;
1352 
1353 	MovePtr<SparseAllocation>	m_sparseAllocation;
1354 	Move<VkBuffer>				m_sparseBuffer;
1355 };
1356 
1357 //! Sparse buffer backing a vertex input buffer
1358 class VertexBufferTestInstance : public DrawGridTestInstance
1359 {
1360 public:
VertexBufferTestInstance(Context & context,const TestFlags flags)1361 	VertexBufferTestInstance (Context& context, const TestFlags flags)
1362 		: DrawGridTestInstance	(context,
1363 								 flags,
1364 								 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
1365 								 GRID_SIZE * GRID_SIZE * 6 * sizeof(Vec4))
1366 	{
1367 	}
1368 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1369 	void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1370 	{
1371 		DE_UNREF(pipelineLayout);
1372 
1373 		m_context.getTestContext().getLog()
1374 			<< tcu::TestLog::Message << "Drawing a grid of triangles backed by a sparse vertex buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1375 
1376 		const DeviceInterface&	vk				= getDeviceInterface();
1377 		const deUint32			vertexCount		= 6 * (GRID_SIZE * GRID_SIZE) / 2;
1378 		VkDeviceSize			vertexOffset	= 0ull;
1379 
1380 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1381 		vk.cmdDraw				(cmdBuffer, vertexCount, 1u, 0u, 0u);
1382 
1383 		vertexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1384 
1385 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1386 		vk.cmdDraw				(cmdBuffer, vertexCount, 1u, 0u, 0u);
1387 	}
1388 
initializeBuffers(void)1389 	void initializeBuffers (void)
1390 	{
1391 		deUint8*	pData	= static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr());
1392 		const float	step	= 2.0f / static_cast<float>(GRID_SIZE);
1393 
1394 		// Prepare data for two draw calls
1395 		generateGrid(pData,							step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE/2);
1396 		generateGrid(pData + m_perDrawBufferOffset,	step, -1.0f,  0.0f, GRID_SIZE, GRID_SIZE/2);
1397 	}
1398 };
1399 
1400 //! Sparse buffer backing an index buffer
1401 class IndexBufferTestInstance : public DrawGridTestInstance
1402 {
1403 public:
IndexBufferTestInstance(Context & context,const TestFlags flags)1404 	IndexBufferTestInstance (Context& context, const TestFlags flags)
1405 		: DrawGridTestInstance	(context,
1406 								 flags,
1407 								 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
1408 								 GRID_SIZE * GRID_SIZE * 6 * sizeof(deUint32))
1409 		, m_halfVertexCount		(6 * (GRID_SIZE * GRID_SIZE) / 2)
1410 	{
1411 	}
1412 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1413 	void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1414 	{
1415 		DE_UNREF(pipelineLayout);
1416 
1417 		m_context.getTestContext().getLog()
1418 			<< tcu::TestLog::Message << "Drawing a grid of triangles from a sparse index buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1419 
1420 		const DeviceInterface&	vk				= getDeviceInterface();
1421 		const VkDeviceSize		vertexOffset	= 0ull;
1422 		VkDeviceSize			indexOffset		= 0ull;
1423 
1424 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1425 
1426 		vk.cmdBindIndexBuffer	(cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1427 		vk.cmdDrawIndexed		(cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1428 
1429 		indexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1430 
1431 		vk.cmdBindIndexBuffer	(cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1432 		vk.cmdDrawIndexed		(cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1433 	}
1434 
initializeBuffers(void)1435 	void initializeBuffers (void)
1436 	{
1437 		// Vertex buffer
1438 		const DeviceInterface&	vk					= getDeviceInterface();
1439 		const VkDeviceSize		vertexBufferSize	= 2 * m_halfVertexCount * sizeof(Vec4);
1440 								m_vertexBuffer		= makeBuffer(vk, getDevice(), makeBufferCreateInfo(vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1441 								m_vertexBufferAlloc	= bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1442 
1443 		{
1444 			const float	step = 2.0f / static_cast<float>(GRID_SIZE);
1445 
1446 			generateGrid(m_vertexBufferAlloc->getHostPtr(), step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE);
1447 
1448 			flushMappedMemoryRange(vk, getDevice(), m_vertexBufferAlloc->getMemory(), m_vertexBufferAlloc->getOffset(), vertexBufferSize);
1449 		}
1450 
1451 		// Sparse index buffer
1452 		for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1453 		{
1454 			deUint8* const	pData		= static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1455 			deUint32* const	pIndexData	= reinterpret_cast<deUint32*>(pData);
1456 			const deUint32	ndxBase		= chunkNdx * m_halfVertexCount;
1457 
1458 			for (deUint32 i = 0u; i < m_halfVertexCount; ++i)
1459 				pIndexData[i] = ndxBase + i;
1460 		}
1461 	}
1462 
1463 private:
1464 	const deUint32			m_halfVertexCount;
1465 	Move<VkBuffer>			m_vertexBuffer;
1466 	MovePtr<Allocation>		m_vertexBufferAlloc;
1467 };
1468 
1469 //! Draw from a sparse indirect buffer
1470 class IndirectBufferTestInstance : public DrawGridTestInstance
1471 {
1472 public:
IndirectBufferTestInstance(Context & context,const TestFlags flags)1473 	IndirectBufferTestInstance (Context& context, const TestFlags flags)
1474 		: DrawGridTestInstance	(context,
1475 								 flags,
1476 								 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
1477 								 sizeof(VkDrawIndirectCommand))
1478 	{
1479 	}
1480 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1481 	void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1482 	{
1483 		DE_UNREF(pipelineLayout);
1484 
1485 		m_context.getTestContext().getLog()
1486 			<< tcu::TestLog::Message << "Drawing two triangles covering the whole viewport. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1487 
1488 		const DeviceInterface&	vk				= getDeviceInterface();
1489 		const VkDeviceSize		vertexOffset	= 0ull;
1490 		VkDeviceSize			indirectOffset	= 0ull;
1491 
1492 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1493 		vk.cmdDrawIndirect		(cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1494 
1495 		indirectOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1496 
1497 		vk.cmdDrawIndirect		(cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1498 	}
1499 
initializeBuffers(void)1500 	void initializeBuffers (void)
1501 	{
1502 		// Vertex buffer
1503 		const DeviceInterface&	vk					= getDeviceInterface();
1504 		const VkDeviceSize		vertexBufferSize	= 2 * 3 * sizeof(Vec4);
1505 								m_vertexBuffer		= makeBuffer(vk, getDevice(), makeBufferCreateInfo(vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1506 								m_vertexBufferAlloc	= bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1507 
1508 		{
1509 			generateGrid(m_vertexBufferAlloc->getHostPtr(), 2.0f, -1.0f, -1.0f, 1, 1);
1510 			flushMappedMemoryRange(vk, getDevice(), m_vertexBufferAlloc->getMemory(), m_vertexBufferAlloc->getOffset(), vertexBufferSize);
1511 		}
1512 
1513 		// Indirect buffer
1514 		for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1515 		{
1516 			deUint8* const					pData		= static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1517 			VkDrawIndirectCommand* const	pCmdData	= reinterpret_cast<VkDrawIndirectCommand*>(pData);
1518 
1519 			pCmdData->firstVertex	= 3u * chunkNdx;
1520 			pCmdData->firstInstance	= 0u;
1521 			pCmdData->vertexCount	= 3u;
1522 			pCmdData->instanceCount	= 1u;
1523 		}
1524 	}
1525 
1526 private:
1527 	Move<VkBuffer>			m_vertexBuffer;
1528 	MovePtr<Allocation>		m_vertexBufferAlloc;
1529 };
1530 
1531 //! Similar to the class in vktTestCaseUtil.hpp, but uses Arg0 directly rather than through a InstanceFunction1
1532 template<typename Arg0>
1533 class FunctionProgramsSimple1
1534 {
1535 public:
1536 	typedef void	(*Function)				(vk::SourceCollections& dst, Arg0 arg0);
FunctionProgramsSimple1(Function func)1537 					FunctionProgramsSimple1	(Function func) : m_func(func)							{}
init(vk::SourceCollections & dst,const Arg0 & arg0) const1538 	void			init					(vk::SourceCollections& dst, const Arg0& arg0) const	{ m_func(dst, arg0); }
1539 
1540 private:
1541 	const Function	m_func;
1542 };
1543 
1544 //! Convenience function to create a TestCase based on a freestanding initPrograms and a TestInstance implementation
1545 template<typename TestInstanceT, typename Arg0>
createTestInstanceWithPrograms(tcu::TestContext & testCtx,const std::string & name,const std::string & desc,typename FunctionProgramsSimple1<Arg0>::Function initPrograms,Arg0 arg0)1546 TestCase* createTestInstanceWithPrograms (tcu::TestContext&									testCtx,
1547 										  const std::string&								name,
1548 										  const std::string&								desc,
1549 										  typename FunctionProgramsSimple1<Arg0>::Function	initPrograms,
1550 										  Arg0												arg0)
1551 {
1552 	return new InstanceFactory1<TestInstanceT, Arg0, FunctionProgramsSimple1<Arg0> >(
1553 		testCtx, tcu::NODETYPE_SELF_VALIDATE, name, desc, FunctionProgramsSimple1<Arg0>(initPrograms), arg0);
1554 }
1555 
populateTestGroup(tcu::TestCaseGroup * parentGroup)1556 void populateTestGroup (tcu::TestCaseGroup* parentGroup)
1557 {
1558 	const struct
1559 	{
1560 		std::string		name;
1561 		TestFlags		flags;
1562 	} groups[] =
1563 	{
1564 		{ "sparse_binding",										0u,													},
1565 		{ "sparse_binding_aliased",								TEST_FLAG_ALIASED,									},
1566 		{ "sparse_residency",									TEST_FLAG_RESIDENCY,								},
1567 		{ "sparse_residency_aliased",							TEST_FLAG_RESIDENCY | TEST_FLAG_ALIASED,			},
1568 		{ "sparse_residency_non_resident_strict",				TEST_FLAG_RESIDENCY | TEST_FLAG_NON_RESIDENT_STRICT,},
1569 	};
1570 
1571 	const int numGroupsIncludingNonResidentStrict	= DE_LENGTH_OF_ARRAY(groups);
1572 	const int numGroupsDefaultList					= numGroupsIncludingNonResidentStrict - 1;
1573 	std::string devGroupPrefix						= "device_group_";
1574 
1575 	// Transfer
1576 	{
1577 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "transfer", ""));
1578 		{
1579 			MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding", ""));
1580 			addBufferSparseBindingTests(subGroup.get(), false);
1581 			group->addChild(subGroup.release());
1582 
1583 			MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding", ""));
1584 			addBufferSparseBindingTests(subGroupDeviceGroups.get(), true);
1585 			group->addChild(subGroupDeviceGroups.release());
1586 		}
1587 		parentGroup->addChild(group.release());
1588 	}
1589 
1590 	// SSBO
1591 	{
1592 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ssbo", ""));
1593 		{
1594 			MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding_aliased", ""));
1595 			addBufferSparseMemoryAliasingTests(subGroup.get(), false);
1596 			group->addChild(subGroup.release());
1597 
1598 			MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding_aliased", ""));
1599 			addBufferSparseMemoryAliasingTests(subGroupDeviceGroups.get(), true);
1600 			group->addChild(subGroupDeviceGroups.release());
1601 		}
1602 		{
1603 			MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_residency", ""));
1604 			addBufferSparseResidencyTests(subGroup.get(), false);
1605 			group->addChild(subGroup.release());
1606 
1607 			MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_residency", ""));
1608 			addBufferSparseResidencyTests(subGroupDeviceGroups.get(), true);
1609 			group->addChild(subGroupDeviceGroups.release());
1610 		}
1611 		parentGroup->addChild(group.release());
1612 	}
1613 
1614 	// UBO
1615 	{
1616 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ubo", ""));
1617 
1618 		for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1619 		{
1620 			group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags));
1621 		}
1622 		for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1623 		{
1624 			group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1625 		}
1626 		parentGroup->addChild(group.release());
1627 	}
1628 
1629 	// Vertex buffer
1630 	{
1631 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "vertex_buffer", ""));
1632 
1633 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1634 		{
1635 			group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1636 		}
1637 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1638 		{
1639 			group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1640 		}
1641 
1642 		parentGroup->addChild(group.release());
1643 	}
1644 
1645 	// Index buffer
1646 	{
1647 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "index_buffer", ""));
1648 
1649 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1650 		{
1651 			group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1652 		}
1653 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1654 		{
1655 			group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1656 		}
1657 
1658 		parentGroup->addChild(group.release());
1659 	}
1660 
1661 	// Indirect buffer
1662 	{
1663 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "indirect_buffer", ""));
1664 
1665 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1666 		{
1667 			group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1668 		}
1669 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1670 		{
1671 			group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), (devGroupPrefix +  groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1672 		}
1673 
1674 		parentGroup->addChild(group.release());
1675 	}
1676 }
1677 
1678 } // anonymous ns
1679 
createSparseBufferTests(tcu::TestContext & testCtx)1680 tcu::TestCaseGroup* createSparseBufferTests (tcu::TestContext& testCtx)
1681 {
1682 	return createTestGroup(testCtx, "buffer", "Sparse buffer usage tests", populateTestGroup);
1683 }
1684 
1685 } // sparse
1686 } // vkt
1687