• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesBufferSparseResidency.cpp
21  * \brief Sparse partially resident buffers tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesBufferSparseResidency.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkQueryUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkTypeUtil.hpp"
39 
40 #include "deStringUtil.hpp"
41 #include "deUniquePtr.hpp"
42 
43 #include <string>
44 #include <vector>
45 
46 using namespace vk;
47 
48 namespace vkt
49 {
50 namespace sparse
51 {
52 namespace
53 {
54 
55 enum ShaderParameters
56 {
57 	SIZE_OF_UINT_IN_SHADER = 4u,
58 };
59 
60 class BufferSparseResidencyCase : public TestCase
61 {
62 public:
63 					BufferSparseResidencyCase	(tcu::TestContext&		testCtx,
64 												 const std::string&		name,
65 												 const std::string&		description,
66 												 const deUint32			bufferSize,
67 												 const glu::GLSLVersion	glslVersion);
68 
69 	void			initPrograms				(SourceCollections&		sourceCollections) const;
70 	TestInstance*	createInstance				(Context&				context) const;
71 
72 private:
73 	const deUint32			m_bufferSize;
74 	const glu::GLSLVersion	m_glslVersion;
75 };
76 
BufferSparseResidencyCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 bufferSize,const glu::GLSLVersion glslVersion)77 BufferSparseResidencyCase::BufferSparseResidencyCase (tcu::TestContext&			testCtx,
78 													  const std::string&		name,
79 													  const std::string&		description,
80 													  const deUint32			bufferSize,
81 													  const glu::GLSLVersion	glslVersion)
82 	: TestCase			(testCtx, name, description)
83 	, m_bufferSize		(bufferSize)
84 	, m_glslVersion		(glslVersion)
85 {
86 }
87 
initPrograms(SourceCollections & sourceCollections) const88 void BufferSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
89 {
90 	const char* const	versionDecl		= glu::getGLSLVersionDeclaration(m_glslVersion);
91 	const deUint32		iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER;
92 
93 	std::ostringstream src;
94 
95 	src << versionDecl << "\n"
96 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
97 		<< "layout(set = 0, binding = 0, std430) readonly buffer Input\n"
98 		<< "{\n"
99 		<< "	uint data[];\n"
100 		<< "} sb_in;\n"
101 		<< "\n"
102 		<< "layout(set = 0, binding = 1, std430) writeonly buffer Output\n"
103 		<< "{\n"
104 		<< "	uint result[];\n"
105 		<< "} sb_out;\n"
106 		<< "\n"
107 		<< "void main (void)\n"
108 		<< "{\n"
109 		<< "	for(int i=0; i<" << iterationsCount << "; ++i) \n"
110 		<< "	{\n"
111 		<< "		sb_out.result[i] = sb_in.data[i];"
112 		<< "	}\n"
113 		<< "}\n";
114 
115 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
116 }
117 
118 class BufferSparseResidencyInstance : public SparseResourcesBaseInstance
119 {
120 public:
121 					BufferSparseResidencyInstance	(Context&			context,
122 													 const deUint32		bufferSize);
123 
124 	tcu::TestStatus	iterate							(void);
125 
126 private:
127 	const deUint32	m_bufferSize;
128 };
129 
BufferSparseResidencyInstance(Context & context,const deUint32 bufferSize)130 BufferSparseResidencyInstance::BufferSparseResidencyInstance (Context&			context,
131 														      const deUint32	bufferSize)
132 	: SparseResourcesBaseInstance	(context)
133 	, m_bufferSize					(bufferSize)
134 {
135 }
136 
iterate(void)137 tcu::TestStatus BufferSparseResidencyInstance::iterate (void)
138 {
139 	const InstanceInterface&		 instance					= m_context.getInstanceInterface();
140 	const VkPhysicalDevice			 physicalDevice				= m_context.getPhysicalDevice();
141 	const VkPhysicalDeviceProperties physicalDeviceProperties	= getPhysicalDeviceProperties(instance, physicalDevice);
142 
143 	if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyBuffer)
144 		TCU_THROW(NotSupportedError, "Sparse partially resident buffers not supported");
145 
146 	{
147 		// Create logical device supporting both sparse and compute operations
148 		QueueRequirementsVec queueRequirements;
149 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
150 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
151 
152 		createDeviceSupportingQueues(queueRequirements);
153 	}
154 
155 	const DeviceInterface&	deviceInterface	= getDeviceInterface();
156 	const Queue&			sparseQueue		= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
157 	const Queue&			computeQueue	= getQueue(VK_QUEUE_COMPUTE_BIT, 0);
158 
159 	VkBufferCreateInfo bufferCreateInfo =
160 	{
161 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
162 		DE_NULL,								// const void*			pNext;
163 		VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
164 		VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT,	// VkBufferCreateFlags	flags;
165 		m_bufferSize,							// VkDeviceSize			size;
166 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
167 		VK_BUFFER_USAGE_TRANSFER_SRC_BIT,		// VkBufferUsageFlags	usage;
168 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
169 		0u,										// deUint32				queueFamilyIndexCount;
170 		DE_NULL									// const deUint32*		pQueueFamilyIndices;
171 	};
172 
173 	const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
174 
175 	if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
176 	{
177 		bufferCreateInfo.sharingMode			= VK_SHARING_MODE_CONCURRENT;
178 		bufferCreateInfo.queueFamilyIndexCount	= 2u;
179 		bufferCreateInfo.pQueueFamilyIndices	= queueFamilyIndices;
180 	}
181 
182 	// Create sparse buffer
183 	const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
184 
185 	// Create sparse buffer memory bind semaphore
186 	const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
187 
188 	const VkMemoryRequirements bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBuffer);
189 
190 	if (bufferMemRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
191 		TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
192 
193 	DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
194 
195 	const deUint32				numSparseSlots = static_cast<deUint32>(bufferMemRequirements.size / bufferMemRequirements.alignment);
196 	std::vector<DeviceMemorySp>	deviceMemUniquePtrVec;
197 
198 	{
199 		std::vector<VkSparseMemoryBind>		sparseMemoryBinds;
200 		const deUint32						memoryType		= findMatchingMemoryType(instance, physicalDevice, bufferMemRequirements, MemoryRequirement::Any);
201 
202 		if (memoryType == NO_MATCH_FOUND)
203 			return tcu::TestStatus::fail("No matching memory type found");
204 
205 		for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
206 		{
207 			const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.alignment, memoryType, bufferMemRequirements.alignment * sparseBindNdx);
208 
209 			deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
210 
211 			sparseMemoryBinds.push_back(sparseMemoryBind);
212 		}
213 
214 		const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo(*sparseBuffer, static_cast<deUint32>(sparseMemoryBinds.size()), &sparseMemoryBinds[0]);
215 
216 		const VkBindSparseInfo bindSparseInfo =
217 		{
218 			VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,			//VkStructureType							sType;
219 			DE_NULL,									//const void*								pNext;
220 			0u,											//deUint32									waitSemaphoreCount;
221 			DE_NULL,									//const VkSemaphore*						pWaitSemaphores;
222 			1u,											//deUint32									bufferBindCount;
223 			&sparseBufferBindInfo,						//const VkSparseBufferMemoryBindInfo*		pBufferBinds;
224 			0u,											//deUint32									imageOpaqueBindCount;
225 			DE_NULL,									//const VkSparseImageOpaqueMemoryBindInfo*	pImageOpaqueBinds;
226 			0u,											//deUint32									imageBindCount;
227 			DE_NULL,									//const VkSparseImageMemoryBindInfo*		pImageBinds;
228 			1u,											//deUint32									signalSemaphoreCount;
229 			&bufferMemoryBindSemaphore.get()			//const VkSemaphore*						pSignalSemaphores;
230 		};
231 
232 		VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
233 	}
234 
235 	// Create input buffer
236 	const VkBufferCreateInfo		inputBufferCreateInfo	= makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
237 	const Unique<VkBuffer>			inputBuffer				(createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo));
238 	const de::UniquePtr<Allocation>	inputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible));
239 
240 
241 	std::vector<deUint8> referenceData;
242 	referenceData.resize(m_bufferSize);
243 
244 	for (deUint32 valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx)
245 	{
246 		referenceData[valueNdx] = static_cast<deUint8>((valueNdx % bufferMemRequirements.alignment) + 1u);
247 	}
248 
249 	deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], m_bufferSize);
250 
251 	flushMappedMemoryRange(deviceInterface, getDevice(), inputBufferAlloc->getMemory(), inputBufferAlloc->getOffset(), m_bufferSize);
252 
253 	// Create output buffer
254 	const VkBufferCreateInfo		outputBufferCreateInfo	= makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
255 	const Unique<VkBuffer>			outputBuffer			(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
256 	const de::UniquePtr<Allocation>	outputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
257 
258 	// Create command buffer for compute and data transfer oparations
259 	const Unique<VkCommandPool>	  commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
260 	const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
261 
262 	// Start recording compute and transfer commands
263 	beginCommandBuffer(deviceInterface, *commandBuffer);
264 
265 	// Create descriptor set
266 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
267 		DescriptorSetLayoutBuilder()
268 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
269 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
270 		.build(deviceInterface, getDevice()));
271 
272 	// Create compute pipeline
273 	const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
274 	const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
275 	const Unique<VkPipeline>		computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
276 
277 	deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
278 
279 	const Unique<VkDescriptorPool> descriptorPool(
280 		DescriptorPoolBuilder()
281 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
282 		.build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
283 
284 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
285 
286 	{
287 		const VkDescriptorBufferInfo inputBufferInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, m_bufferSize);
288 		const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize);
289 
290 		DescriptorSetUpdateBuilder()
291 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo)
292 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
293 			.update(deviceInterface, getDevice());
294 	}
295 
296 	deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
297 
298 	{
299 		const VkBufferMemoryBarrier inputBufferBarrier
300 			= makeBufferMemoryBarrier(	VK_ACCESS_HOST_WRITE_BIT,
301 										VK_ACCESS_SHADER_READ_BIT,
302 										*inputBuffer,
303 										0ull,
304 										m_bufferSize);
305 
306 		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL);
307 	}
308 
309 	deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u);
310 
311 	{
312 		const VkBufferMemoryBarrier sparseBufferBarrier
313 			= makeBufferMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
314 										VK_ACCESS_TRANSFER_READ_BIT,
315 										*sparseBuffer,
316 										0ull,
317 										m_bufferSize);
318 
319 		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferBarrier, 0u, DE_NULL);
320 	}
321 
322 	{
323 		const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize);
324 
325 		deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, *outputBuffer, 1u, &bufferCopy);
326 	}
327 
328 	{
329 		const VkBufferMemoryBarrier outputBufferBarrier
330 			= makeBufferMemoryBarrier(	VK_ACCESS_TRANSFER_WRITE_BIT,
331 										VK_ACCESS_HOST_READ_BIT,
332 										*outputBuffer,
333 										0ull,
334 										m_bufferSize);
335 
336 		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL);
337 	}
338 
339 	// End recording compute and transfer commands
340 	endCommandBuffer(deviceInterface, *commandBuffer);
341 
342 	const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT };
343 
344 	// Submit transfer commands for execution and wait for completion
345 	submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(), waitStageBits);
346 
347 	// Retrieve data from output buffer to host memory
348 	invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), m_bufferSize);
349 
350 	const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
351 
352 	// Wait for sparse queue to become idle
353 	deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
354 
355 	// Compare output data with reference data
356 	for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; ++sparseBindNdx)
357 	{
358 		const deUint32 alignment = static_cast<deUint32>(bufferMemRequirements.alignment);
359 		const deUint32 offset	 = alignment * sparseBindNdx;
360 		const deUint32 size		 = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment;
361 
362 		if (sparseBindNdx % 2u == 0u)
363 		{
364 			if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
365 				return tcu::TestStatus::fail("Failed");
366 		}
367 		else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
368 		{
369 			deMemset(&referenceData[offset], 0u, size);
370 
371 			if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
372 				return tcu::TestStatus::fail("Failed");
373 		}
374 	}
375 
376 	return tcu::TestStatus::pass("Passed");
377 }
378 
createInstance(Context & context) const379 TestInstance* BufferSparseResidencyCase::createInstance (Context& context) const
380 {
381 	return new BufferSparseResidencyInstance(context, m_bufferSize);
382 }
383 
384 } // anonymous ns
385 
addBufferSparseResidencyTests(tcu::TestCaseGroup * group)386 void addBufferSparseResidencyTests(tcu::TestCaseGroup* group)
387 {
388 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440));
389 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440));
390 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440));
391 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440));
392 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440));
393 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440));
394 }
395 
396 } // sparse
397 } // vkt
398