• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *	  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Build Large Shader Set tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingBuildLargeTests.hpp"
25 
26 #include "vkDefs.hpp"
27 
28 #include "vktTestCase.hpp"
29 #include "vkCmdUtil.hpp"
30 #include "vkObjUtil.hpp"
31 #include "vkBuilderUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkTypeUtil.hpp"
36 
37 #include "vkRayTracingUtil.hpp"
38 
39 #include "deClock.h"
40 
41 #include <limits>
42 
43 namespace vkt
44 {
45 namespace RayTracing
46 {
47 namespace
48 {
49 using namespace vk;
50 using namespace std;
51 
52 static const VkFlags	ALL_RAY_TRACING_STAGES	= VK_SHADER_STAGE_RAYGEN_BIT_KHR
53 												| VK_SHADER_STAGE_ANY_HIT_BIT_KHR
54 												| VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR
55 												| VK_SHADER_STAGE_MISS_BIT_KHR
56 												| VK_SHADER_STAGE_INTERSECTION_BIT_KHR
57 												| VK_SHADER_STAGE_CALLABLE_BIT_KHR;
58 
59 struct CaseDef
60 {
61 	deUint32							width;
62 	deUint32							height;
63 	deUint32							squaresGroupCount;
64 	deUint32							geometriesGroupCount;
65 	deUint32							instancesGroupCount;
66 	bool								deferredOperation;
67 	VkAccelerationStructureBuildTypeKHR	buildType;
68 	deUint32							workerThreadsCount;
69 };
70 
getShaderGroupSize(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)71 deUint32 getShaderGroupSize (const InstanceInterface&	vki,
72 							 const VkPhysicalDevice		physicalDevice)
73 {
74 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
75 
76 	rayTracingPropertiesKHR	= makeRayTracingProperties(vki, physicalDevice);
77 
78 	return rayTracingPropertiesKHR->getShaderGroupHandleSize();
79 }
80 
getShaderGroupBaseAlignment(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)81 deUint32 getShaderGroupBaseAlignment (const InstanceInterface&	vki,
82 									  const VkPhysicalDevice	physicalDevice)
83 {
84 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
85 
86 	rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
87 
88 	return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
89 }
90 
makePipeline(const DeviceInterface & vkd,const VkDevice device,vk::BinaryCollection & collection,de::MovePtr<RayTracingPipeline> & rayTracingPipeline,VkPipelineLayout pipelineLayout,const deUint32 groupCount,const bool deferredOperation,const deUint32 threadCount)91 Move<VkPipeline> makePipeline (const DeviceInterface&			vkd,
92 							   const VkDevice					device,
93 							   vk::BinaryCollection&			collection,
94 							   de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
95 							   VkPipelineLayout					pipelineLayout,
96 							   const deUint32					groupCount,
97 							   const bool						deferredOperation,
98 							   const deUint32					threadCount)
99 {
100 	Move<VkShaderModule>	raygenShader	= createShaderModule(vkd, device, collection.get("rgen"), 0);
101 
102 	rayTracingPipeline->setDeferredOperation(deferredOperation, threadCount);
103 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, raygenShader, 0);
104 
105 	for (deUint32 groupNdx = 0; groupNdx < groupCount; ++groupNdx)
106 	{
107 		const std::string		shaderName	= "call" + de::toString(groupNdx);
108 		Move<VkShaderModule>	callShader	= createShaderModule(vkd, device, collection.get(shaderName), 0);
109 
110 		rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR,	callShader, 1 + groupNdx);
111 	}
112 
113 	Move<VkPipeline> pipeline = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
114 
115 	return pipeline;
116 }
117 
makeImageCreateInfo(deUint32 width,deUint32 height,VkFormat format)118 VkImageCreateInfo makeImageCreateInfo (deUint32 width, deUint32 height, VkFormat format)
119 {
120 	const VkImageUsageFlags	usage			= VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
121 	const VkImageCreateInfo	imageCreateInfo	=
122 	{
123 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	// VkStructureType			sType;
124 		DE_NULL,								// const void*				pNext;
125 		(VkImageCreateFlags)0u,					// VkImageCreateFlags		flags;
126 		VK_IMAGE_TYPE_2D,						// VkImageType				imageType;
127 		format,									// VkFormat					format;
128 		makeExtent3D(width, height, 1u),		// VkExtent3D				extent;
129 		1u,										// deUint32					mipLevels;
130 		1u,										// deUint32					arrayLayers;
131 		VK_SAMPLE_COUNT_1_BIT,					// VkSampleCountFlagBits	samples;
132 		VK_IMAGE_TILING_OPTIMAL,				// VkImageTiling			tiling;
133 		usage,									// VkImageUsageFlags		usage;
134 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode			sharingMode;
135 		0u,										// deUint32					queueFamilyIndexCount;
136 		DE_NULL,								// const deUint32*			pQueueFamilyIndices;
137 		VK_IMAGE_LAYOUT_UNDEFINED				// VkImageLayout			initialLayout;
138 	};
139 
140 	return imageCreateInfo;
141 }
142 
143 class RayTracingBuildLargeTestInstance : public TestInstance
144 {
145 public:
146 																RayTracingBuildLargeTestInstance	(Context& context, const CaseDef& data);
147 																~RayTracingBuildLargeTestInstance	(void);
148 	tcu::TestStatus												iterate								(void);
149 
150 protected:
151 	deUint32													iterateNoWorkers					(void);
152 	deUint32													iterateWithWorkers					(void);
153 	void														checkSupportInInstance				(void) const;
154 	de::MovePtr<BufferWithMemory>								runTest								(const deUint32										threadCount);
155 	deUint32													validateBuffer						(de::MovePtr<BufferWithMemory>						buffer);
156 	de::SharedPtr<TopLevelAccelerationStructure>				initTopAccelerationStructure		(VkCommandBuffer									cmdBuffer,
157 																									 de::SharedPtr<BottomLevelAccelerationStructure>&	bottomLevelAccelerationStructure);
158 	de::SharedPtr<BottomLevelAccelerationStructure>				initBottomAccelerationStructure		(VkCommandBuffer	cmdBuffer);
159 
160 private:
161 	CaseDef														m_data;
162 };
163 
RayTracingBuildLargeTestInstance(Context & context,const CaseDef & data)164 RayTracingBuildLargeTestInstance::RayTracingBuildLargeTestInstance (Context& context, const CaseDef& data)
165 	: vkt::TestInstance		(context)
166 	, m_data				(data)
167 {
168 }
169 
~RayTracingBuildLargeTestInstance(void)170 RayTracingBuildLargeTestInstance::~RayTracingBuildLargeTestInstance (void)
171 {
172 }
173 
174 class RayTracingTestCase : public TestCase
175 {
176 	public:
177 							RayTracingTestCase	(tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
178 							~RayTracingTestCase	(void);
179 
180 	virtual	void			initPrograms		(SourceCollections& programCollection) const;
181 	virtual TestInstance*	createInstance		(Context& context) const;
182 	virtual void			checkSupport		(Context& context) const;
183 
184 private:
185 	std::string				generateDummyWork	(const deUint32 shaderNdx) const;
186 	CaseDef					m_data;
187 };
188 
RayTracingTestCase(tcu::TestContext & context,const char * name,const char * desc,const CaseDef data)189 RayTracingTestCase::RayTracingTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
190 	: vkt::TestCase	(context, name, desc)
191 	, m_data		(data)
192 {
193 	DE_ASSERT((m_data.width * m_data.height) == (m_data.squaresGroupCount * m_data.geometriesGroupCount * m_data.instancesGroupCount));
194 }
195 
~RayTracingTestCase(void)196 RayTracingTestCase::~RayTracingTestCase	(void)
197 {
198 }
199 
checkSupport(Context & context) const200 void RayTracingTestCase::checkSupport(Context& context) const
201 {
202 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
203 	context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
204 
205 	const VkPhysicalDeviceRayTracingPipelineFeaturesKHR&	rayTracingPipelineFeaturesKHR		= context.getRayTracingPipelineFeatures();
206 	if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == DE_FALSE )
207 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
208 
209 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR	= context.getAccelerationStructureFeatures();
210 	if (accelerationStructureFeaturesKHR.accelerationStructure == DE_FALSE)
211 		TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
212 
213 	if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
214 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
215 
216 	if (m_data.deferredOperation)
217 		context.requireDeviceFunctionality("VK_KHR_deferred_host_operations");
218 }
219 
generateDummyWork(const deUint32 shaderNdx) const220 std::string RayTracingTestCase::generateDummyWork (const deUint32 shaderNdx) const
221 {
222 	std::string	result;
223 
224 	for (deUint32 n = 0; n < shaderNdx % 256; ++n)
225 	{
226 		result += "  color.b = color.b + 2 * " + de::toString(n) + ";\n";
227 		result += "  color.g = color.g + 3 * " + de::toString(n) + ";\n";
228 		result += "  color.b = color.b ^ color.g;\n";
229 		result += "  color.b = color.b % 223;\n";
230 		result += "  color.g = color.g % 227;\n";
231 		result += "  color.g = color.g ^ color.b;\n";
232 	}
233 
234 	return result;
235 }
236 
initPrograms(SourceCollections & programCollection) const237 void RayTracingTestCase::initPrograms (SourceCollections& programCollection) const
238 {
239 	const vk::ShaderBuildOptions	buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
240 	{
241 		std::stringstream css;
242 		css <<
243 			"#version 460 core\n"
244 			"#extension GL_EXT_ray_tracing : require\n"
245 			"layout(location = 0) callableDataEXT float dummy;"
246 			"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
247 			"\n"
248 			"void main()\n"
249 			"{\n"
250 			"  uint n = " << m_data.width << " * gl_LaunchIDEXT.y + gl_LaunchIDEXT.x;\n"
251 			"  executeCallableEXT(n, 0);\n"
252 			"}\n";
253 
254 		programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
255 	}
256 
257 	for (deUint32 y = 0; y < m_data.height; ++y)
258 	for (deUint32 x = 0; x < m_data.width; ++x)
259 	{
260 		const deUint32		shaderNdx	= m_data.width * y + x;
261 		const bool			dummyWork	= (shaderNdx % 43 == 0);
262 		std::stringstream	css;
263 		css <<
264 			"#version 460 core\n"
265 			"#extension GL_EXT_ray_tracing : require\n"
266 			"layout(location = 0) callableDataInEXT float dummy;\n"
267 			"layout(r32ui, set = 0, binding = 0) uniform uimage2D image0_0;\n"
268 			"void main()\n"
269 			"{\n"
270 			"  uint r = (" << m_data.width << " * " << y / 3 << " + " << x << ") % 199;\n"
271 			"  uvec4 color = uvec4(r,0,0,1);\n"
272 			<< (dummyWork ? generateDummyWork(shaderNdx) : "") <<
273 			"  imageStore(image0_0, ivec2(gl_LaunchIDEXT.xy), color);\n"
274 			"}\n";
275 
276 		programCollection.glslSources.add("call" + de::toString(shaderNdx)) << glu::CallableSource(updateRayTracingGLSL(css.str())) << buildOptions;
277 	}
278 }
279 
createInstance(Context & context) const280 TestInstance* RayTracingTestCase::createInstance (Context& context) const
281 {
282 	return new RayTracingBuildLargeTestInstance(context, m_data);
283 }
284 
initTopAccelerationStructure(VkCommandBuffer cmdBuffer,de::SharedPtr<BottomLevelAccelerationStructure> & bottomLevelAccelerationStructure)285 de::SharedPtr<TopLevelAccelerationStructure> RayTracingBuildLargeTestInstance::initTopAccelerationStructure (VkCommandBuffer									cmdBuffer,
286 																											 de::SharedPtr<BottomLevelAccelerationStructure>&	bottomLevelAccelerationStructure)
287 {
288 	const DeviceInterface&						vkd			= m_context.getDeviceInterface();
289 	const VkDevice								device		= m_context.getDevice();
290 	Allocator&									allocator	= m_context.getDefaultAllocator();
291 	de::MovePtr<TopLevelAccelerationStructure>	result		= makeTopLevelAccelerationStructure();
292 
293 	result->setInstanceCount(1);
294 	result->setBuildType(m_data.buildType);
295 	result->setDeferredOperation(m_data.deferredOperation);
296 	result->addInstance(bottomLevelAccelerationStructure);
297 
298 	result->createAndBuild(vkd, device, cmdBuffer, allocator);
299 
300 	return de::SharedPtr<TopLevelAccelerationStructure>(result.release());
301 }
302 
initBottomAccelerationStructure(VkCommandBuffer cmdBuffer)303 de::SharedPtr<BottomLevelAccelerationStructure> RayTracingBuildLargeTestInstance::initBottomAccelerationStructure (VkCommandBuffer	cmdBuffer)
304 {
305 	const DeviceInterface&							vkd			= m_context.getDeviceInterface();
306 	const VkDevice									device		= m_context.getDevice();
307 	Allocator&										allocator	= m_context.getDefaultAllocator();
308 	tcu::UVec2										startPos	= tcu::UVec2(0u, 0u);
309 	de::MovePtr<BottomLevelAccelerationStructure>	result		= makeBottomLevelAccelerationStructure();
310 
311 	result->setBuildType(m_data.buildType);
312 	result->setDeferredOperation(m_data.deferredOperation);
313 	result->setGeometryCount(m_data.geometriesGroupCount);
314 
315 	for (size_t geometryNdx = 0; geometryNdx < m_data.geometriesGroupCount; ++geometryNdx)
316 	{
317 		std::vector<tcu::Vec3>	geometryData;
318 
319 		geometryData.reserve(m_data.squaresGroupCount * 3u);
320 
321 		for (size_t squareNdx = 0; squareNdx < m_data.squaresGroupCount; ++squareNdx)
322 		{
323 			const deUint32	n	= m_data.width * startPos.y() + startPos.x();
324 			const deUint32	m	= (13 * (n + 1)) % (m_data.width * m_data.height);
325 			const float		x0	= float(startPos.x() + 0) / float(m_data.width);
326 			const float		y0	= float(startPos.y() + 0) / float(m_data.height);
327 			const float		x1	= float(startPos.x() + 1) / float(m_data.width);
328 			const float		y1	= float(startPos.y() + 1) / float(m_data.height);
329 			const float		xm	= (x0 + x1) / 2.0f;
330 			const float		ym	= (y0 + y1) / 2.0f;
331 
332 			geometryData.push_back(tcu::Vec3(x0, y0, -1.0f));
333 			geometryData.push_back(tcu::Vec3(xm, y1, -1.0f));
334 			geometryData.push_back(tcu::Vec3(x1, ym, -1.0f));
335 
336 			startPos.y() = m / m_data.width;
337 			startPos.x() = m % m_data.width;
338 		}
339 
340 		result->addGeometry(geometryData, true);
341 	}
342 
343 	result->createAndBuild(vkd, device, cmdBuffer, allocator);
344 
345 	return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
346 }
347 
runTest(const deUint32 threadCount)348 de::MovePtr<BufferWithMemory> RayTracingBuildLargeTestInstance::runTest (const deUint32	threadCount)
349 {
350 	const InstanceInterface&			vki									= m_context.getInstanceInterface();
351 	const DeviceInterface&				vkd									= m_context.getDeviceInterface();
352 	const VkDevice						device								= m_context.getDevice();
353 	const VkPhysicalDevice				physicalDevice						= m_context.getPhysicalDevice();
354 	const deUint32						queueFamilyIndex					= m_context.getUniversalQueueFamilyIndex();
355 	const VkQueue						queue								= m_context.getUniversalQueue();
356 	Allocator&							allocator							= m_context.getDefaultAllocator();
357 	const VkFormat						format								= VK_FORMAT_R32_UINT;
358 	const deUint32						pixelCount							= m_data.width * m_data.height;
359 	const deUint32						callableShaderCount					= m_data.width * m_data.height;
360 	const deUint32						shaderGroupHandleSize				= getShaderGroupSize(vki, physicalDevice);
361 	const deUint32						shaderGroupBaseAlignment			= getShaderGroupBaseAlignment(vki, physicalDevice);
362 
363 	const Move<VkDescriptorSetLayout>	descriptorSetLayout					= DescriptorSetLayoutBuilder()
364 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
365 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
366 																					.build(vkd, device);
367 	const Move<VkDescriptorPool>		descriptorPool						= DescriptorPoolBuilder()
368 																					.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
369 																					.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
370 																					.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
371 	const Move<VkDescriptorSet>			descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
372 	const Move<VkPipelineLayout>		pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
373 	const Move<VkCommandPool>			cmdPool								= createCommandPool(vkd, device, 0, queueFamilyIndex);
374 	const Move<VkCommandBuffer>			cmdBuffer							= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
375 
376 	de::MovePtr<RayTracingPipeline>		rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
377 	const Move<VkPipeline>				pipeline							= makePipeline(vkd, device, m_context.getBinaryCollection(), rayTracingPipeline, *pipelineLayout, callableShaderCount, m_data.deferredOperation, threadCount);
378 	const de::MovePtr<BufferWithMemory>	raygenShaderBindingTable			= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1u);
379 	const de::MovePtr<BufferWithMemory>	callableShaderBindingTable			= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1u, callableShaderCount);
380 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
381 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
382 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
383 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callableShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize * callableShaderCount);
384 
385 	const VkImageCreateInfo				imageCreateInfo						= makeImageCreateInfo(m_data.width, m_data.height, format);
386 	const VkImageSubresourceRange		imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
387 	const de::MovePtr<ImageWithMemory>	image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
388 	const Move<VkImageView>				imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, format, imageSubresourceRange);
389 
390 	const VkBufferCreateInfo			bufferCreateInfo					= makeBufferCreateInfo(pixelCount*sizeof(deUint32), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
391 	const VkImageSubresourceLayers		bufferImageSubresourceLayers		= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
392 	const VkBufferImageCopy				bufferImageRegion					= makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), bufferImageSubresourceLayers);
393 	de::MovePtr<BufferWithMemory>		buffer								= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible));
394 
395 	const VkDescriptorImageInfo			descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
396 
397 	const VkImageMemoryBarrier			preImageBarrier						= makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
398 																				VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
399 																				**image, imageSubresourceRange);
400 	const VkImageMemoryBarrier			postImageBarrier					= makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
401 																				VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
402 																				**image, imageSubresourceRange);
403 	const VkMemoryBarrier				postTraceMemoryBarrier				= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
404 	const VkMemoryBarrier				postCopyMemoryBarrier				= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
405 	const VkClearValue					clearValue							= makeClearValueColorU32(5u, 5u, 5u, 255u);
406 
407 	de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure;
408 	de::SharedPtr<TopLevelAccelerationStructure>	topLevelAccelerationStructure;
409 
410 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
411 	{
412 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
413 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
414 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
415 
416 		bottomLevelAccelerationStructure	= initBottomAccelerationStructure(*cmdBuffer);
417 		topLevelAccelerationStructure		= initTopAccelerationStructure(*cmdBuffer, bottomLevelAccelerationStructure);
418 
419 		const TopLevelAccelerationStructure*			topLevelAccelerationStructurePtr		= topLevelAccelerationStructure.get();
420 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
421 		{
422 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
423 			DE_NULL,															//  const void*							pNext;
424 			1u,																	//  deUint32							accelerationStructureCount;
425 			topLevelAccelerationStructurePtr->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
426 		};
427 
428 		DescriptorSetUpdateBuilder()
429 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
430 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
431 			.update(vkd, device);
432 
433 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
434 
435 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
436 
437 		cmdTraceRays(vkd,
438 			*cmdBuffer,
439 			&raygenShaderBindingTableRegion,
440 			&missShaderBindingTableRegion,
441 			&hitShaderBindingTableRegion,
442 			&callableShaderBindingTableRegion,
443 			m_data.width, m_data.height, 1);
444 
445 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
446 
447 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **buffer, 1u, &bufferImageRegion);
448 
449 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
450 	}
451 	endCommandBuffer(vkd, *cmdBuffer);
452 
453 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
454 
455 	invalidateMappedMemoryRange(vkd, device, buffer->getAllocation().getMemory(), buffer->getAllocation().getOffset(), pixelCount * sizeof(deUint32));
456 
457 	return buffer;
458 }
459 
checkSupportInInstance(void) const460 void RayTracingBuildLargeTestInstance::checkSupportInInstance (void) const
461 {
462 	const InstanceInterface&				vki						= m_context.getInstanceInterface();
463 	const VkPhysicalDevice					physicalDevice			= m_context.getPhysicalDevice();
464 	const vk::VkPhysicalDeviceProperties&	properties				= m_context.getDeviceProperties();
465 	const deUint32							requiredAllocations		= 8u
466 																	+ TopLevelAccelerationStructure::getRequiredAllocationCount()
467 																	+ m_data.instancesGroupCount * BottomLevelAccelerationStructure::getRequiredAllocationCount();
468 	de::MovePtr<RayTracingProperties>		rayTracingProperties	= makeRayTracingProperties(vki, physicalDevice);
469 
470 	if (rayTracingProperties->getMaxPrimitiveCount() < m_data.squaresGroupCount)
471 		TCU_THROW(NotSupportedError, "Triangles required more than supported");
472 
473 	if (rayTracingProperties->getMaxGeometryCount() < m_data.geometriesGroupCount)
474 		TCU_THROW(NotSupportedError, "Geometries required more than supported");
475 
476 	if (rayTracingProperties->getMaxInstanceCount() < m_data.instancesGroupCount)
477 		TCU_THROW(NotSupportedError, "Instances required more than supported");
478 
479 	if (properties.limits.maxMemoryAllocationCount < requiredAllocations)
480 		TCU_THROW(NotSupportedError, "Test requires more allocations allowed");
481 }
482 
validateBuffer(de::MovePtr<BufferWithMemory> buffer)483 deUint32 RayTracingBuildLargeTestInstance::validateBuffer (de::MovePtr<BufferWithMemory> buffer)
484 {
485 	const deUint32*	bufferPtr	= (deUint32*)buffer->getAllocation().getHostPtr();
486 	deUint32		failures	= 0;
487 	deUint32		pos			= 0;
488 
489 	for (deUint32 y = 0; y < m_data.height; ++y)
490 	for (deUint32 x = 0; x < m_data.width; ++x)
491 	{
492 		const deUint32	expectedValue	= (m_data.width * (y / 3) + x) % 199;
493 
494 		if (bufferPtr[pos] != expectedValue)
495 			failures++;
496 
497 		++pos;
498 	}
499 
500 	return failures;
501 }
502 
iterateNoWorkers(void)503 deUint32 RayTracingBuildLargeTestInstance::iterateNoWorkers (void)
504 {
505 	de::MovePtr<BufferWithMemory>	buffer		= runTest(0);
506 	const deUint32					failures	= validateBuffer(buffer);
507 
508 	return failures;
509 }
510 
iterateWithWorkers(void)511 deUint32 RayTracingBuildLargeTestInstance::iterateWithWorkers (void)
512 {
513 	const deUint64					singleThreadTimeStart	= deGetMicroseconds();
514 	de::MovePtr<BufferWithMemory>	singleThreadBuffer		= runTest(0);
515 	const deUint32					singleThreadFailures	= validateBuffer(singleThreadBuffer);
516 	const deUint64					singleThreadTime		= deGetMicroseconds() - singleThreadTimeStart;
517 
518 	deUint64						multiThreadTimeStart	= deGetMicroseconds();
519 	de::MovePtr<BufferWithMemory>	multiThreadBuffer		= runTest(m_data.workerThreadsCount);
520 	const deUint32					multiThreadFailures		= validateBuffer(multiThreadBuffer);
521 	deUint64						multiThreadTime			= deGetMicroseconds() - multiThreadTimeStart;
522 	const deUint64					multiThreadTimeOut		= 10 * singleThreadTime;
523 
524 	const deUint32					failures				= singleThreadFailures + multiThreadFailures;
525 
526 	DE_ASSERT(multiThreadTimeOut > singleThreadTime);
527 
528 	if (multiThreadTime > multiThreadTimeOut)
529 	{
530 		string failMsg	= "Time of multithreaded test execution " + de::toString(multiThreadTime) +
531 						  " that is longer than expected execution time " + de::toString(multiThreadTimeOut);
532 
533 		TCU_FAIL(failMsg);
534 	}
535 
536 	return failures;
537 }
538 
iterate(void)539 tcu::TestStatus RayTracingBuildLargeTestInstance::iterate (void)
540 {
541 	checkSupportInInstance();
542 
543 	const deUint32	failures	= m_data.workerThreadsCount == 0
544 								? iterateNoWorkers()
545 								: iterateWithWorkers();
546 
547 	if (failures == 0)
548 		return tcu::TestStatus::pass("Pass");
549 	else
550 		return tcu::TestStatus::fail("failures=" + de::toString(failures));
551 }
552 
553 }	// anonymous
554 
createBuildLargeShaderSetTests(tcu::TestContext & testCtx)555 tcu::TestCaseGroup*	createBuildLargeShaderSetTests (tcu::TestContext& testCtx)
556 {
557 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "large_shader_set", "Build large shader set using CPU host threading"));
558 
559 	const deUint32	sizes[]		= { 8, 16, 32, 64 };
560 	const struct
561 	{
562 		const char*									buildTypeName;
563 		bool										deferredOperation;
564 		const VkAccelerationStructureBuildTypeKHR	buildType;
565 	}
566 	buildTypes[] =
567 	{
568 		{  "gpu",		false,	VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR	},
569 		{  "cpu_ht",	true,	VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR	},
570 	};
571 	const deUint32	threads[]	= { 1, 2, 3, 4, 8, std::numeric_limits<deUint32>::max() };
572 
573 	for (size_t buildNdx = 0; buildNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildNdx)
574 	{
575 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(testCtx, buildTypes[buildNdx].buildTypeName, ""));
576 
577 		for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
578 		{
579 			const deUint32	largestGroup			= sizes[sizesNdx] * sizes[sizesNdx];
580 			const deUint32	squaresGroupCount		= largestGroup;
581 			const deUint32	geometriesGroupCount	= 1;
582 			const deUint32	instancesGroupCount		= 1;
583 			const CaseDef	caseDef					=
584 			{
585 				sizes[sizesNdx],						//  deUint32							width;
586 				sizes[sizesNdx],						//  deUint32							height;
587 				squaresGroupCount,						//  deUint32							squaresGroupCount;
588 				geometriesGroupCount,					//  deUint32							geometriesGroupCount;
589 				instancesGroupCount,					//  deUint32							instancesGroupCount;
590 				buildTypes[buildNdx].deferredOperation,	//  bool								deferredOperation;
591 				buildTypes[buildNdx].buildType,			//  VkAccelerationStructureBuildTypeKHR	buildType;
592 				0,										//  deUint32							threadsCount;
593 			};
594 			const std::string	testName			= de::toString(largestGroup);
595 
596 			buildTypeGroup->addChild(new RayTracingTestCase(testCtx, testName.c_str(), "", caseDef));
597 		}
598 
599 		group->addChild(buildTypeGroup.release());
600 	}
601 
602 	for (size_t threadsNdx = 0; threadsNdx < DE_LENGTH_OF_ARRAY(threads); ++threadsNdx)
603 	{
604 		for (size_t buildNdx = 0; buildNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildNdx)
605 		{
606 			if (buildTypes[buildNdx].buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
607 				continue;
608 
609 			const std::string				suffix				= threads[threadsNdx] == std::numeric_limits<deUint32>::max() ? "max" : de::toString(threads[threadsNdx]);
610 			const std::string				buildTypeGroupName	= std::string(buildTypes[buildNdx].buildTypeName) + '_' + suffix;
611 			de::MovePtr<tcu::TestCaseGroup> buildTypeGroup		  (new tcu::TestCaseGroup(testCtx, buildTypeGroupName.c_str(), ""));
612 
613 			for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
614 			{
615 				const deUint32	largestGroup			= sizes[sizesNdx] * sizes[sizesNdx];
616 				const deUint32	squaresGroupCount		= largestGroup;
617 				const deUint32	geometriesGroupCount	= 1;
618 				const deUint32	instancesGroupCount		= 1;
619 				const CaseDef	caseDef					=
620 				{
621 					sizes[sizesNdx],						//  deUint32							width;
622 					sizes[sizesNdx],						//  deUint32							height;
623 					squaresGroupCount,						//  deUint32							squaresGroupCount;
624 					geometriesGroupCount,					//  deUint32							geometriesGroupCount;
625 					instancesGroupCount,					//  deUint32							instancesGroupCount;
626 					buildTypes[buildNdx].deferredOperation,	//  bool								deferredOperation;
627 					buildTypes[buildNdx].buildType,			//  VkAccelerationStructureBuildTypeKHR	buildType;
628 					threads[threadsNdx],					//  deUint32							workerThreadsCount;
629 				};
630 				const std::string	testName			= de::toString(largestGroup);
631 
632 				buildTypeGroup->addChild(new RayTracingTestCase(testCtx, testName.c_str(), "", caseDef));
633 			}
634 
635 			group->addChild(buildTypeGroup.release());
636 		}
637 	}
638 
639 	return group.release();
640 }
641 
642 }	// RayTracing
643 }	// vkt
644