• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *	  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Build tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingBuildTests.hpp"
25 
26 #include "vkDefs.hpp"
27 
28 #include "vktTestCase.hpp"
29 #include "vkCmdUtil.hpp"
30 #include "vkObjUtil.hpp"
31 #include "vkBuilderUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkTypeUtil.hpp"
36 
37 #include "vkRayTracingUtil.hpp"
38 
39 #include "deClock.h"
40 
41 #include <limits>
42 
43 namespace vkt
44 {
45 namespace RayTracing
46 {
47 namespace
48 {
49 using namespace vk;
50 using namespace std;
51 
52 static const VkFlags	ALL_RAY_TRACING_STAGES	= VK_SHADER_STAGE_RAYGEN_BIT_KHR
53 												| VK_SHADER_STAGE_ANY_HIT_BIT_KHR
54 												| VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR
55 												| VK_SHADER_STAGE_MISS_BIT_KHR
56 												| VK_SHADER_STAGE_INTERSECTION_BIT_KHR
57 												| VK_SHADER_STAGE_CALLABLE_BIT_KHR;
58 
59 enum TestType
60 {
61 	TEST_TYPE_TRIANGLES,
62 	TEST_TYPE_AABBS,
63 	TEST_TYPE_MIXED,
64 };
65 
66 struct CaseDef
67 {
68 	TestType	testType;
69 	deUint32	width;
70 	deUint32	height;
71 	deUint32	squaresGroupCount;
72 	deUint32	geometriesGroupCount;
73 	deUint32	instancesGroupCount;
74 	bool		deferredOperation;
75 	deUint32	workerThreadsCount;
76 };
77 
getShaderGroupSize(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)78 deUint32 getShaderGroupSize (const InstanceInterface&	vki,
79 							 const VkPhysicalDevice		physicalDevice)
80 {
81 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
82 
83 	rayTracingPropertiesKHR	= makeRayTracingProperties(vki, physicalDevice);
84 	return rayTracingPropertiesKHR->getShaderGroupHandleSize();
85 }
86 
getShaderGroupBaseAlignment(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)87 deUint32 getShaderGroupBaseAlignment (const InstanceInterface&	vki,
88 									  const VkPhysicalDevice	physicalDevice)
89 {
90 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
91 
92 	rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
93 	return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
94 }
95 
makeImageCreateInfo(deUint32 width,deUint32 height,VkFormat format)96 VkImageCreateInfo makeImageCreateInfo (deUint32 width, deUint32 height, VkFormat format)
97 {
98 	const VkImageUsageFlags	usage			= VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
99 	const VkImageCreateInfo	imageCreateInfo	=
100 	{
101 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	// VkStructureType			sType;
102 		DE_NULL,								// const void*				pNext;
103 		(VkImageCreateFlags)0u,					// VkImageCreateFlags		flags;
104 		VK_IMAGE_TYPE_2D,						// VkImageType				imageType;
105 		format,									// VkFormat					format;
106 		makeExtent3D(width, height, 1u),		// VkExtent3D				extent;
107 		1u,										// deUint32					mipLevels;
108 		1u,										// deUint32					arrayLayers;
109 		VK_SAMPLE_COUNT_1_BIT,					// VkSampleCountFlagBits	samples;
110 		VK_IMAGE_TILING_OPTIMAL,				// VkImageTiling			tiling;
111 		usage,									// VkImageUsageFlags		usage;
112 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode			sharingMode;
113 		0u,										// deUint32					queueFamilyIndexCount;
114 		DE_NULL,								// const deUint32*			pQueueFamilyIndices;
115 		VK_IMAGE_LAYOUT_UNDEFINED				// VkImageLayout			initialLayout;
116 	};
117 
118 	return imageCreateInfo;
119 }
120 
121 class RayTracingBuildTestInstance : public TestInstance
122 {
123 public:
124 																RayTracingBuildTestInstance			(Context& context, const CaseDef& data);
125 																~RayTracingBuildTestInstance		(void);
126 	tcu::TestStatus												iterate								(void);
127 
128 protected:
129 	deUint32													iterateNoWorkers					(void);
130 	deUint32													iterateWithWorkers					(void);
131 	void														checkSupportInInstance				(void) const;
132 	deUint32													validateBuffer						(de::MovePtr<BufferWithMemory>								buffer);
133 	de::MovePtr<BufferWithMemory>								runTest								(bool														useGpuBuild,
134 																									 deUint32													workerThreadsCount);
135 	de::MovePtr<TopLevelAccelerationStructure>					initTopAccelerationStructure		(VkCommandBuffer											cmdBuffer,
136 																									 bool														useGpuBuild,
137 																									 deUint32													workerThreadsCount,
138 																									 vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures);
139 	vector<de::SharedPtr<BottomLevelAccelerationStructure>	>	initBottomAccelerationStructures	(VkCommandBuffer											cmdBuffer,
140 																									 bool														useGpuBuild,
141 																									 deUint32													workerThreadsCount);
142 	de::MovePtr<BottomLevelAccelerationStructure>				initBottomAccelerationStructure		(VkCommandBuffer											cmdBuffer,
143 																									 bool														useGpuBuild,
144 																									 deUint32													workerThreadsCount,
145 																									 tcu::UVec2&												startPos,
146 																									 bool														triangles);
147 
148 private:
149 	CaseDef														m_data;
150 };
151 
RayTracingBuildTestInstance(Context & context,const CaseDef & data)152 RayTracingBuildTestInstance::RayTracingBuildTestInstance (Context& context, const CaseDef& data)
153 	: vkt::TestInstance		(context)
154 	, m_data				(data)
155 {
156 }
157 
~RayTracingBuildTestInstance(void)158 RayTracingBuildTestInstance::~RayTracingBuildTestInstance (void)
159 {
160 }
161 
162 class RayTracingTestCase : public TestCase
163 {
164 	public:
165 							RayTracingTestCase	(tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
166 							~RayTracingTestCase	(void);
167 
168 	virtual	void			initPrograms		(SourceCollections& programCollection) const;
169 	virtual TestInstance*	createInstance		(Context& context) const;
170 	virtual void			checkSupport		(Context& context) const;
171 
172 private:
173 	CaseDef					m_data;
174 };
175 
RayTracingTestCase(tcu::TestContext & context,const char * name,const char * desc,const CaseDef data)176 RayTracingTestCase::RayTracingTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
177 	: vkt::TestCase	(context, name, desc)
178 	, m_data		(data)
179 {
180 	DE_ASSERT((m_data.width * m_data.height) == (m_data.squaresGroupCount * m_data.geometriesGroupCount * m_data.instancesGroupCount));
181 }
182 
~RayTracingTestCase(void)183 RayTracingTestCase::~RayTracingTestCase	(void)
184 {
185 }
186 
checkSupport(Context & context) const187 void RayTracingTestCase::checkSupport(Context& context) const
188 {
189 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
190 	context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
191 
192 	const VkPhysicalDeviceRayTracingPipelineFeaturesKHR&	rayTracingPipelineFeaturesKHR		= context.getRayTracingPipelineFeatures();
193 	if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == DE_FALSE )
194 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
195 
196 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR	= context.getAccelerationStructureFeatures();
197 	if (accelerationStructureFeaturesKHR.accelerationStructure == DE_FALSE)
198 		TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
199 
200 	if (accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
201 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
202 
203 	if (m_data.deferredOperation)
204 		context.requireDeviceFunctionality("VK_KHR_deferred_host_operations");
205 }
206 
initPrograms(SourceCollections & programCollection) const207 void RayTracingTestCase::initPrograms (SourceCollections& programCollection) const
208 {
209 	const vk::ShaderBuildOptions	buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
210 	{
211 		std::stringstream css;
212 		css <<
213 			"#version 460 core\n"
214 			"#extension GL_EXT_ray_tracing : require\n"
215 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
216 			"hitAttributeEXT vec3 attribs;\n"
217 			"layout(r32ui, set = 0, binding = 0) uniform uimage2D result;\n"
218 			"void main()\n"
219 			"{\n"
220 			"  uvec4 color = uvec4(1,0,0,1);\n"
221 			"  imageStore(result, ivec2(gl_LaunchIDEXT.xy), color);\n"
222 			"}\n";
223 
224 		programCollection.glslSources.add("ahit") << glu::AnyHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
225 	}
226 
227 	{
228 		std::stringstream css;
229 		css <<
230 			"#version 460 core\n"
231 			"#extension GL_EXT_ray_tracing : require\n"
232 			"layout(location = 0) rayPayloadInEXT dummyPayload { vec4 dummy; };\n"
233 			"layout(r32ui, set = 0, binding = 0) uniform uimage2D result;\n"
234 			"void main()\n"
235 			"{\n"
236 			"  uvec4 color = uvec4(2,0,0,1);\n"
237 			"  imageStore(result, ivec2(gl_LaunchIDEXT.xy), color);\n"
238 			"}\n";
239 
240 		programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
241 	}
242 
243 	{
244 		std::stringstream css;
245 		css <<
246 			"#version 460 core\n"
247 			"#extension GL_EXT_ray_tracing : require\n"
248 			"hitAttributeEXT vec3 hitAttribute;\n"
249 			"void main()\n"
250 			"{\n"
251 			"  reportIntersectionEXT(1.0f, 0);\n"
252 			"}\n";
253 
254 		programCollection.glslSources.add("sect") << glu::IntersectionSource(updateRayTracingGLSL(css.str())) << buildOptions;
255 	}
256 
257 	programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(getCommonRayGenerationShader())) << buildOptions;
258 }
259 
createInstance(Context & context) const260 TestInstance* RayTracingTestCase::createInstance (Context& context) const
261 {
262 	return new RayTracingBuildTestInstance(context, m_data);
263 }
264 
initTopAccelerationStructure(VkCommandBuffer cmdBuffer,bool useGpuBuild,deUint32 workerThreadsCount,vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)265 de::MovePtr<TopLevelAccelerationStructure> RayTracingBuildTestInstance::initTopAccelerationStructure (VkCommandBuffer											cmdBuffer,
266 																									  bool														useGpuBuild,
267 																									  deUint32													workerThreadsCount,
268 																									  vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures)
269 {
270 	const DeviceInterface&						vkd			= m_context.getDeviceInterface();
271 	const VkDevice								device		= m_context.getDevice();
272 	Allocator&									allocator	= m_context.getDefaultAllocator();
273 	de::MovePtr<TopLevelAccelerationStructure>	result		= makeTopLevelAccelerationStructure();
274 
275 	result->setInstanceCount(bottomLevelAccelerationStructures.size());
276 	result->setBuildType(useGpuBuild ? VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR : VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR);
277 	result->setDeferredOperation(m_data.deferredOperation, workerThreadsCount);
278 
279 	for (size_t instanceNdx = 0; instanceNdx < bottomLevelAccelerationStructures.size(); ++instanceNdx)
280 	{
281 		const bool	triangles								= (m_data.testType == TEST_TYPE_TRIANGLES) || (m_data.testType == TEST_TYPE_MIXED && (instanceNdx & 1) == 0);
282 		deUint32	instanceShaderBindingTableRecordOffset	= triangles ? 0 : 1;
283 
284 		result->addInstance(bottomLevelAccelerationStructures[instanceNdx], vk::identityMatrix3x4, 0, 0xFF, instanceShaderBindingTableRecordOffset);
285 	}
286 
287 	result->createAndBuild(vkd, device, cmdBuffer, allocator);
288 
289 	return result;
290 }
291 
initBottomAccelerationStructure(VkCommandBuffer cmdBuffer,bool useGpuBuild,deUint32 workerThreadsCount,tcu::UVec2 & startPos,bool triangles)292 de::MovePtr<BottomLevelAccelerationStructure> RayTracingBuildTestInstance::initBottomAccelerationStructure (VkCommandBuffer	cmdBuffer,
293 																											bool			useGpuBuild,
294 																											deUint32		workerThreadsCount,
295 																											tcu::UVec2&		startPos,
296 																											bool			triangles)
297 {
298 	const DeviceInterface&							vkd			= m_context.getDeviceInterface();
299 	const VkDevice									device		= m_context.getDevice();
300 	Allocator&										allocator	= m_context.getDefaultAllocator();
301 	de::MovePtr<BottomLevelAccelerationStructure>	result		= makeBottomLevelAccelerationStructure();
302 
303 	result->setBuildType(useGpuBuild ? VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR : VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR);
304 	result->setDeferredOperation(m_data.deferredOperation, workerThreadsCount);
305 	result->setGeometryCount(m_data.geometriesGroupCount);
306 
307 	for (size_t geometryNdx = 0; geometryNdx < m_data.geometriesGroupCount; ++geometryNdx)
308 	{
309 		std::vector<tcu::Vec3>	geometryData;
310 
311 		geometryData.reserve(m_data.squaresGroupCount * (triangles ? 3u : 2u));
312 
313 		for (size_t squareNdx = 0; squareNdx < m_data.squaresGroupCount; ++squareNdx)
314 		{
315 			const deUint32	n	= m_data.width * startPos.y() + startPos.x();
316 			const float		x0	= float(startPos.x() + 0) / float(m_data.width);
317 			const float		y0	= float(startPos.y() + 0) / float(m_data.height);
318 			const float		x1	= float(startPos.x() + 1) / float(m_data.width);
319 			const float		y1	= float(startPos.y() + 1) / float(m_data.height);
320 			const float		z	= (n % 7 == 0) ? +1.0f : -1.0f;
321 			const deUint32	m	= (13 * (n + 1)) % (m_data.width * m_data.height);
322 
323 			if (triangles)
324 			{
325 				const float	xm	= (x0 + x1) / 2.0f;
326 				const float	ym	= (y0 + y1) / 2.0f;
327 
328 				geometryData.push_back(tcu::Vec3(x0, y0, z));
329 				geometryData.push_back(tcu::Vec3(xm, y1, z));
330 				geometryData.push_back(tcu::Vec3(x1, ym, z));
331 
332 				if (m_data.squaresGroupCount == 1)
333 				{
334 					geometryData.push_back(tcu::Vec3(x0, y0, z));
335 					geometryData.push_back(tcu::Vec3(x1, ym, z));
336 					geometryData.push_back(tcu::Vec3(xm, y1, z));
337 				}
338 			}
339 			else
340 			{
341 				geometryData.push_back(tcu::Vec3(x0, y0, z));
342 				geometryData.push_back(tcu::Vec3(x1, y1, z));
343 			}
344 
345 			startPos.y() = m / m_data.width;
346 			startPos.x() = m % m_data.width;
347 		}
348 
349 		result->addGeometry(geometryData, triangles);
350 	}
351 
352 	result->createAndBuild(vkd, device, cmdBuffer, allocator);
353 
354 	return result;
355 }
356 
initBottomAccelerationStructures(VkCommandBuffer cmdBuffer,bool useGpuBuild,deUint32 workerThreadsCount)357 vector<de::SharedPtr<BottomLevelAccelerationStructure> > RayTracingBuildTestInstance::initBottomAccelerationStructures (VkCommandBuffer	cmdBuffer,
358 																														bool			useGpuBuild,
359 																														deUint32		workerThreadsCount)
360 {
361 	tcu::UVec2													startPos;
362 	vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
363 
364 	for (size_t instanceNdx = 0; instanceNdx < m_data.instancesGroupCount; ++instanceNdx)
365 	{
366 		const bool	triangles	= (m_data.testType == TEST_TYPE_TRIANGLES) || (m_data.testType == TEST_TYPE_MIXED && (instanceNdx & 1) == 0);
367 		de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure	= initBottomAccelerationStructure(cmdBuffer, useGpuBuild, workerThreadsCount, startPos, triangles);
368 
369 		result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
370 	}
371 
372 	return result;
373 }
374 
runTest(bool useGpuBuild,deUint32 workerThreadsCount)375 de::MovePtr<BufferWithMemory> RayTracingBuildTestInstance::runTest (bool useGpuBuild, deUint32 workerThreadsCount)
376 {
377 	const InstanceInterface&			vki									= m_context.getInstanceInterface();
378 	const DeviceInterface&				vkd									= m_context.getDeviceInterface();
379 	const VkDevice						device								= m_context.getDevice();
380 	const VkPhysicalDevice				physicalDevice						= m_context.getPhysicalDevice();
381 	const deUint32						queueFamilyIndex					= m_context.getUniversalQueueFamilyIndex();
382 	const VkQueue						queue								= m_context.getUniversalQueue();
383 	Allocator&							allocator							= m_context.getDefaultAllocator();
384 	const VkFormat						format								= VK_FORMAT_R32_UINT;
385 	const deUint32						pixelCount							= m_data.width * m_data.height;
386 	const deUint32						shaderGroupHandleSize				= getShaderGroupSize(vki, physicalDevice);
387 	const deUint32						shaderGroupBaseAlignment			= getShaderGroupBaseAlignment(vki, physicalDevice);
388 
389 	const Move<VkDescriptorSetLayout>	descriptorSetLayout					= DescriptorSetLayoutBuilder()
390 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
391 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
392 																					.build(vkd, device);
393 	const Move<VkDescriptorPool>		descriptorPool						= DescriptorPoolBuilder()
394 																					.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
395 																					.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
396 																					.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
397 	const Move<VkDescriptorSet>			descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
398 	const Move<VkPipelineLayout>		pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
399 	const Move<VkCommandPool>			cmdPool								= createCommandPool(vkd, device, 0, queueFamilyIndex);
400 	const Move<VkCommandBuffer>			cmdBuffer							= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
401 
402 	de::MovePtr<RayTracingPipeline>		rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
403 	Move<VkShaderModule>				raygenShader						= createShaderModule(vkd, device, m_context.getBinaryCollection().get("rgen"), 0);
404 	de::SharedPtr<Move<VkShaderModule>>	hitShader							= makeVkSharedPtr(createShaderModule(vkd, device, m_context.getBinaryCollection().get("ahit"), 0));
405 	Move<VkShaderModule>				missShader							= createShaderModule(vkd, device, m_context.getBinaryCollection().get("miss"), 0);
406 	Move<VkShaderModule>				intersectionShader					= createShaderModule(vkd, device, m_context.getBinaryCollection().get("sect"), 0);
407 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		raygenShader,		0u);
408 	rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR,		hitShader,			1u);
409 	rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR,		hitShader,			2u);
410 	rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, intersectionShader, 2u);
411 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			missShader,			3u);
412 	Move<VkPipeline> pipeline = rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
413 	const de::MovePtr<BufferWithMemory>	raygenShaderBindingTable			= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0u, 1u);
414 	const de::MovePtr<BufferWithMemory>	hitShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1u, 2u);
415 	const de::MovePtr<BufferWithMemory>	missShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 3u, 1u);
416 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
417 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0), shaderGroupHandleSize, 2u * shaderGroupHandleSize);
418 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
419 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
420 
421 	const VkImageCreateInfo				imageCreateInfo						= makeImageCreateInfo(m_data.width, m_data.height, format);
422 	const VkImageSubresourceRange		imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
423 	const de::MovePtr<ImageWithMemory>	image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
424 	const Move<VkImageView>				imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, format, imageSubresourceRange);
425 
426 	const VkBufferCreateInfo			bufferCreateInfo					= makeBufferCreateInfo(pixelCount*sizeof(deUint32), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
427 	const VkImageSubresourceLayers		bufferImageSubresourceLayers		= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
428 	const VkBufferImageCopy				bufferImageRegion					= makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), bufferImageSubresourceLayers);
429 	de::MovePtr<BufferWithMemory>		buffer								= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible));
430 
431 	const VkDescriptorImageInfo			descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
432 
433 	const VkImageMemoryBarrier			preImageBarrier						= makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
434 																				VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
435 																				**image, imageSubresourceRange);
436 	const VkImageMemoryBarrier			postImageBarrier					= makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
437 																				VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
438 																				**image, imageSubresourceRange);
439 	const VkMemoryBarrier				postTraceMemoryBarrier				= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
440 	const VkMemoryBarrier				postCopyMemoryBarrier				= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
441 	const VkClearValue					clearValue							= makeClearValueColorU32(5u, 5u, 5u, 255u);
442 
443 	vector<de::SharedPtr<BottomLevelAccelerationStructure> >	bottomLevelAccelerationStructures;
444 	de::MovePtr<TopLevelAccelerationStructure>					topLevelAccelerationStructure;
445 
446 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
447 	{
448 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
449 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
450 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
451 
452 		bottomLevelAccelerationStructures = initBottomAccelerationStructures(*cmdBuffer, useGpuBuild, workerThreadsCount);
453 		topLevelAccelerationStructure = initTopAccelerationStructure(*cmdBuffer, useGpuBuild, workerThreadsCount, bottomLevelAccelerationStructures);
454 
455 		const TopLevelAccelerationStructure*			topLevelAccelerationStructurePtr		= topLevelAccelerationStructure.get();
456 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
457 		{
458 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
459 			DE_NULL,															//  const void*							pNext;
460 			1u,																	//  deUint32							accelerationStructureCount;
461 			topLevelAccelerationStructurePtr->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
462 		};
463 
464 		DescriptorSetUpdateBuilder()
465 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
466 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
467 			.update(vkd, device);
468 
469 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
470 
471 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
472 
473 		cmdTraceRays(vkd,
474 			*cmdBuffer,
475 			&raygenShaderBindingTableRegion,
476 			&missShaderBindingTableRegion,
477 			&hitShaderBindingTableRegion,
478 			&callableShaderBindingTableRegion,
479 			m_data.width, m_data.height, 1);
480 
481 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
482 
483 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **buffer, 1u, &bufferImageRegion);
484 
485 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
486 	}
487 	endCommandBuffer(vkd, *cmdBuffer);
488 
489 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
490 
491 	invalidateMappedMemoryRange(vkd, device, buffer->getAllocation().getMemory(), buffer->getAllocation().getOffset(), pixelCount * sizeof(deUint32));
492 
493 	return buffer;
494 }
495 
checkSupportInInstance(void) const496 void RayTracingBuildTestInstance::checkSupportInInstance (void) const
497 {
498 	const InstanceInterface&				vki						= m_context.getInstanceInterface();
499 	const VkPhysicalDevice					physicalDevice			= m_context.getPhysicalDevice();
500 	const vk::VkPhysicalDeviceProperties&	properties				= m_context.getDeviceProperties();
501 	const deUint32							requiredAllocations		= 8u
502 																	+ TopLevelAccelerationStructure::getRequiredAllocationCount()
503 																	+ m_data.instancesGroupCount * BottomLevelAccelerationStructure::getRequiredAllocationCount();
504 	de::MovePtr<RayTracingProperties>		rayTracingProperties	= makeRayTracingProperties(vki, physicalDevice);
505 
506 	if (rayTracingProperties->getMaxPrimitiveCount() < m_data.squaresGroupCount)
507 		TCU_THROW(NotSupportedError, "Triangles required more than supported");
508 
509 	if (rayTracingProperties->getMaxGeometryCount() < m_data.geometriesGroupCount)
510 		TCU_THROW(NotSupportedError, "Geometries required more than supported");
511 
512 	if (rayTracingProperties->getMaxInstanceCount() < m_data.instancesGroupCount)
513 		TCU_THROW(NotSupportedError, "Instances required more than supported");
514 
515 	if (properties.limits.maxMemoryAllocationCount < requiredAllocations)
516 		TCU_THROW(NotSupportedError, "Test requires more allocations allowed");
517 }
518 
validateBuffer(de::MovePtr<BufferWithMemory> buffer)519 deUint32 RayTracingBuildTestInstance::validateBuffer (de::MovePtr<BufferWithMemory>	buffer)
520 {
521 	const deUint32*	bufferPtr	= (deUint32*)buffer->getAllocation().getHostPtr();
522 	deUint32		failures	= 0;
523 	deUint32		pos			= 0;
524 
525 	for (deUint32 y = 0; y < m_data.height; ++y)
526 	for (deUint32 x = 0; x < m_data.width; ++x)
527 	{
528 		const deUint32	anyHitValue		= 1;
529 		const deUint32	missValue		= 2;
530 
531 		const deUint32	n				= m_data.width * y + x;
532 		const deUint32	expectedValue	= (n % 7 == 0) ? missValue : anyHitValue;
533 
534 		if (bufferPtr[pos] != expectedValue)
535 		{
536 			if (m_data.testType == TEST_TYPE_AABBS || m_data.testType == TEST_TYPE_MIXED)
537 			{
538 				// In the case of AABB geometries, implementations may increase their size in
539 				// an acceleration structure in order to mitigate precision issues. This may
540 				// result in false positives being reported to the application."
541 
542 				if (bufferPtr[pos] != anyHitValue)
543 				{
544 					failures++;
545 				}
546 			}
547 			else
548 			{
549 				failures++;
550 			}
551 		}
552 
553 		++pos;
554 	}
555 
556 	return failures;
557 }
558 
iterateWithWorkers(void)559 deUint32 RayTracingBuildTestInstance::iterateWithWorkers (void)
560 {
561 	const deUint64					singleThreadTimeStart	= deGetMicroseconds();
562 	de::MovePtr<BufferWithMemory>	singleThreadBufferCPU	= runTest(false, 0);
563 	const deUint32					singleThreadFailures	= validateBuffer(singleThreadBufferCPU);
564 	const deUint64					singleThreadTime		= deGetMicroseconds() - singleThreadTimeStart;
565 
566 	deUint64						multiThreadTimeStart	= deGetMicroseconds();
567 	de::MovePtr<BufferWithMemory>	multiThreadBufferCPU	= runTest(false, m_data.workerThreadsCount);
568 	const deUint32					multiThreadFailures		= validateBuffer(multiThreadBufferCPU);
569 	deUint64						multiThreadTime			= deGetMicroseconds() - multiThreadTimeStart;
570 	const deUint64					multiThreadTimeOut		= 10 * singleThreadTime;
571 
572 	const deUint32					failures				= singleThreadFailures + multiThreadFailures;
573 
574 	DE_ASSERT(multiThreadTimeOut > singleThreadTime);
575 
576 	if (multiThreadTime > multiThreadTimeOut)
577 	{
578 		string failMsg	= "Time of multithreaded test execution " + de::toString(multiThreadTime) +
579 						  " that is longer than expected execution time " + de::toString(multiThreadTimeOut);
580 
581 		TCU_FAIL(failMsg);
582 	}
583 
584 	return failures;
585 }
586 
iterateNoWorkers(void)587 deUint32 RayTracingBuildTestInstance::iterateNoWorkers (void)
588 {
589 	de::MovePtr<BufferWithMemory>	bufferGPU		= runTest(true, 0);
590 	de::MovePtr<BufferWithMemory>	bufferCPU		= runTest(false, 0);
591 	const deUint32					failuresGPU		= validateBuffer(bufferGPU);
592 	const deUint32					failuresCPU		= validateBuffer(bufferCPU);
593 	const deUint32					failures		= failuresGPU + failuresCPU;
594 
595 	return failures;
596 }
597 
iterate(void)598 tcu::TestStatus RayTracingBuildTestInstance::iterate (void)
599 {
600 	checkSupportInInstance();
601 
602 	const deUint32	failures	= m_data.workerThreadsCount == 0
603 								? iterateNoWorkers()
604 								: iterateWithWorkers();
605 
606 	if (failures == 0)
607 		return tcu::TestStatus::pass("Pass");
608 	else
609 		return tcu::TestStatus::fail("failures=" + de::toString(failures));
610 }
611 
612 }	// anonymous
613 
createBuildTests(tcu::TestContext & testCtx)614 tcu::TestCaseGroup*	createBuildTests (tcu::TestContext& testCtx)
615 {
616 	de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(testCtx, "build", "Ray tracing build tests"));
617 
618 	const char*		tests[]	=
619 	{
620 		"level_primitives",
621 		"level_geometries",
622 		"level_instances"
623 	};
624 	const deUint32	sizes[]		= { 4, 16, 64, 256, 1024 };
625 	const deUint32	factors[]	= { 1, 4 };
626 	const deUint32	threads[]	= { 0, 1, 2, 3, 4, 8, std::numeric_limits<deUint32>::max() };
627 
628 	for (size_t threadNdx = 0; threadNdx <= DE_LENGTH_OF_ARRAY(threads); ++threadNdx)
629 	{
630 		const bool						defferedOperation	= threadNdx != DE_LENGTH_OF_ARRAY(threads);
631 		const deUint32					threadsCount		= threadNdx < DE_LENGTH_OF_ARRAY(threads) ? threads[threadNdx] : 0;
632 		const string					groupName			= !defferedOperation ? "gpu_cpu"
633 															: threadsCount == 0 ? "gpu_cpuht"
634 															: threadsCount == std::numeric_limits<deUint32>::max() ? "cpuht_max"
635 															: "cpuht_" + de::toString(threadsCount);
636 		const string					groupDesc			= !defferedOperation ? "Compare results of run with acceleration structures build on GPU and CPU"
637 															: threadsCount > 0 ? "Compare results of run with acceleration structures build on GPU and using host threading"
638 															: "Run acceleration structures build using host threading";
639 
640 		const bool						deviceBuild			= !defferedOperation || threadsCount == 0;
641 
642 		de::MovePtr<tcu::TestCaseGroup>	groupGpuCpuHt		(new tcu::TestCaseGroup(testCtx, groupName.c_str(), groupDesc.c_str()));
643 
644 		for (size_t testsNdx = 0; testsNdx < DE_LENGTH_OF_ARRAY(tests); ++testsNdx)
645 		{
646 			de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, tests[testsNdx], ""));
647 
648 			for (size_t factorNdx = 0; factorNdx < DE_LENGTH_OF_ARRAY(factors); ++factorNdx)
649 			for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
650 			{
651 				if (deviceBuild && sizes[sizesNdx] > 256)
652 					continue;
653 
654 				const deUint32	factor					= factors[factorNdx];
655 				const deUint32	largestGroup			= sizes[sizesNdx] * sizes[sizesNdx] / factor / factor;
656 				const deUint32	squaresGroupCount		= testsNdx == 0 ? largestGroup : factor;
657 				const deUint32	geometriesGroupCount	= testsNdx == 1 ? largestGroup : factor;
658 				const deUint32	instancesGroupCount		= testsNdx == 2 ? largestGroup : factor;
659 				const CaseDef	caseDef					=
660 				{
661 					TEST_TYPE_TRIANGLES,	//  TestType	testType;
662 					sizes[sizesNdx],		//  deUint32	width;
663 					sizes[sizesNdx],		//  deUint32	height;
664 					squaresGroupCount,		//  deUint32	squaresGroupCount;
665 					geometriesGroupCount,	//  deUint32	geometriesGroupCount;
666 					instancesGroupCount,	//  deUint32	instancesGroupCount;
667 					defferedOperation,		//  bool		deferredOperation;
668 					threadsCount			//  deUint32	workerThreadsCount;
669 				};
670 				const std::string	suffix		= de::toString(caseDef.instancesGroupCount) + '_' + de::toString(caseDef.geometriesGroupCount) + '_' + de::toString(caseDef.squaresGroupCount);
671 				const std::string	testName	= "triangles_" + suffix;
672 
673 				if (squaresGroupCount == 0 || geometriesGroupCount == 0 || instancesGroupCount == 0)
674 					continue;
675 
676 				group->addChild(new RayTracingTestCase(testCtx, testName.c_str(), "", caseDef));
677 			}
678 
679 			for (size_t factorNdx = 0; factorNdx < DE_LENGTH_OF_ARRAY(factors); ++factorNdx)
680 			for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
681 			{
682 				if (deviceBuild && sizes[sizesNdx] > 256)
683 					continue;
684 
685 				const deUint32	factor					= factors[factorNdx];
686 				const deUint32	largestGroup			= sizes[sizesNdx] * sizes[sizesNdx] / factor / factor;
687 				const deUint32	squaresGroupCount		= testsNdx == 0 ? largestGroup : factor;
688 				const deUint32	geometriesGroupCount	= testsNdx == 1 ? largestGroup : factor;
689 				const deUint32	instancesGroupCount		= testsNdx == 2 ? largestGroup : factor;
690 				const CaseDef	caseDef					=
691 				{
692 					TEST_TYPE_AABBS,		//  TestType	testType;
693 					sizes[sizesNdx],		//  deUint32	width;
694 					sizes[sizesNdx],		//  deUint32	height;
695 					squaresGroupCount,		//  deUint32	squaresGroupCount;
696 					geometriesGroupCount,	//  deUint32	geometriesGroupCount;
697 					instancesGroupCount,	//  deUint32	instancesGroupCount;
698 					defferedOperation,		//  bool		deferredOperation;
699 					threadsCount			//  deUint32	workerThreadsCount;
700 				};
701 				const std::string	suffix		= de::toString(caseDef.instancesGroupCount) + '_' + de::toString(caseDef.geometriesGroupCount) + '_' + de::toString(caseDef.squaresGroupCount);
702 				const std::string	testName	= "aabbs_" + suffix;
703 
704 				if (squaresGroupCount == 0 || geometriesGroupCount == 0 || instancesGroupCount == 0)
705 					continue;
706 
707 				group->addChild(new RayTracingTestCase(testCtx, testName.c_str(), "", caseDef));
708 			}
709 
710 			for (size_t factorNdx = 0; factorNdx < DE_LENGTH_OF_ARRAY(factors); ++factorNdx)
711 			for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
712 			{
713 				if (deviceBuild && sizes[sizesNdx] > 256)
714 					continue;
715 
716 				const deUint32	factor					= factors[factorNdx];
717 				const deUint32	largestGroup			= sizes[sizesNdx] * sizes[sizesNdx] / factor / factor;
718 				const deUint32	squaresGroupCount		= testsNdx == 0 ? largestGroup : factor;
719 				const deUint32	geometriesGroupCount	= testsNdx == 1 ? largestGroup : factor;
720 				const deUint32	instancesGroupCount		= testsNdx == 2 ? largestGroup : factor;
721 				const CaseDef	caseDef					=
722 				{
723 					TEST_TYPE_MIXED,		//  TestType	testType;
724 					sizes[sizesNdx],		//  deUint32	width;
725 					sizes[sizesNdx],		//  deUint32	height;
726 					squaresGroupCount,		//  deUint32	squaresGroupCount;
727 					geometriesGroupCount,	//  deUint32	geometriesGroupCount;
728 					instancesGroupCount,	//  deUint32	instancesGroupCount;
729 					defferedOperation,		//  bool		deferredOperation;
730 					threadsCount			//  deUint32	workerThreadsCount;
731 				};
732 				const std::string	suffix		= de::toString(caseDef.instancesGroupCount) + '_' + de::toString(caseDef.geometriesGroupCount) + '_' + de::toString(caseDef.squaresGroupCount);
733 				const std::string	testName	= "mixed_" + suffix;
734 
735 				if (squaresGroupCount < 2 || geometriesGroupCount < 2 || instancesGroupCount < 2)
736 					continue;
737 
738 				group->addChild(new RayTracingTestCase(testCtx, testName.c_str(), "", caseDef));
739 			}
740 
741 			groupGpuCpuHt->addChild(group.release());
742 		}
743 
744 		buildGroup->addChild(groupGpuCpuHt.release());
745 	}
746 
747 	return buildGroup.release();
748 }
749 
750 }	// RayTracing
751 }	// vkt
752