• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *	  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Pipeline Library Tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingPipelineLibraryTests.hpp"
25 
26 #include <list>
27 #include <vector>
28 
29 #include "vkDefs.hpp"
30 
31 #include "vktTestCase.hpp"
32 #include "vktTestGroupUtil.hpp"
33 #include "vktCustomInstancesDevices.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkObjUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38 #include "vkBufferWithMemory.hpp"
39 #include "vkImageWithMemory.hpp"
40 #include "vkTypeUtil.hpp"
41 
42 #include "vkRayTracingUtil.hpp"
43 
44 #include "tcuCommandLine.hpp"
45 
46 namespace vkt
47 {
48 namespace RayTracing
49 {
50 namespace
51 {
52 using namespace vk;
53 using namespace vkt;
54 
55 static const VkFlags	ALL_RAY_TRACING_STAGES		= VK_SHADER_STAGE_RAYGEN_BIT_KHR
56 													| VK_SHADER_STAGE_ANY_HIT_BIT_KHR
57 													| VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR
58 													| VK_SHADER_STAGE_MISS_BIT_KHR
59 													| VK_SHADER_STAGE_INTERSECTION_BIT_KHR
60 													| VK_SHADER_STAGE_CALLABLE_BIT_KHR;
61 
62 static const deUint32	RTPL_DEFAULT_SIZE			= 8u;
63 static const deUint32	RTPL_MAX_CHIT_SHADER_COUNT	= 16;
64 
65 struct LibraryConfiguration
66 {
67 	deInt32								pipelineShaders;
68 	std::vector<tcu::IVec2>				pipelineLibraries; // IVec2 = ( parentID, shaderCount )
69 };
70 
71 enum class TestType
72 {
73 	DEFAULT = 0,
74 	CHECK_GROUP_HANDLES,
75 	CHECK_CAPTURE_REPLAY_HANDLES,
76 	CHECK_ALL_HANDLES,
77 };
78 
79 struct TestParams
80 {
81 	LibraryConfiguration				libraryConfiguration;
82 	bool								multithreadedCompilation;
83 	bool								pipelinesCreatedUsingDHO;
84 	TestType							testType;
85 	bool								useAABBs;
86 	bool								useMaintenance5;
87 	bool								useLinkTimeOptimizations;
88 	bool								retainLinkTimeOptimizations;
89 	deUint32							width;
90 	deUint32							height;
91 
getPixelCountvkt::RayTracing::__anon3493a2290111::TestParams92 	uint32_t getPixelCount (void) const
93 	{
94 		return width * height;
95 	}
96 
getHitGroupCountvkt::RayTracing::__anon3493a2290111::TestParams97 	uint32_t getHitGroupCount (void) const
98 	{
99 		uint32_t numShadersUsed = libraryConfiguration.pipelineShaders;
100 		for (const auto& lib : libraryConfiguration.pipelineLibraries)
101 			numShadersUsed += lib.y();
102 		return numShadersUsed;
103 	}
104 
includesCaptureReplayvkt::RayTracing::__anon3493a2290111::TestParams105 	bool includesCaptureReplay (void) const
106 	{
107 		return (testType == TestType::CHECK_CAPTURE_REPLAY_HANDLES || testType == TestType::CHECK_ALL_HANDLES);
108 	}
109 };
110 
111 // This class will help verify shader group handles in libraries by maintaining information of the library tree and being able to
112 // calculate the offset of the handles for each pipeline in the "flattened" array of shader group handles.
113 class PipelineTree
114 {
115 protected:
116 	// Each node represents a pipeline.
117 	class Node
118 	{
119 	public:
Node(int64_t parent,uint32_t groupCount)120 		Node (int64_t parent, uint32_t groupCount)
121 			: m_parent		(parent)
122 			, m_groupCount	(groupCount)
123 			, m_children	()
124 			, m_frozen		(false)
125 			, m_flatOffset	(std::numeric_limits<uint32_t>::max())
126 		{}
127 
appendChild(Node * child)128 		void		appendChild				(Node* child)					{ m_children.push_back(child); }
getOffset(void) const129 		uint32_t	getOffset				(void) const					{ return m_flatOffset; }
freeze(void)130 		void		freeze					(void)							{ m_frozen = true; }
calcOffsetRecursively(uint32_t currentOffset)131 		uint32_t	calcOffsetRecursively	(uint32_t currentOffset)		// Returns the next offset.
132 		{
133 			DE_ASSERT(m_frozen);
134 			m_flatOffset = currentOffset;
135 			uint32_t newOffset = currentOffset + m_groupCount;
136 			for (auto& node : m_children)
137 				newOffset = node->calcOffsetRecursively(newOffset);
138 			return newOffset;
139 		}
140 
141 	protected:
142 		const int64_t		m_parent;		// Parent pipeline (-1 for the root node).
143 		const uint32_t		m_groupCount;	// Shader group count in pipeline. Related to LibraryConfiguration::pipelineLibraries[1].
144 		std::vector<Node*>	m_children;		// How many child pipelines. Related to LibraryConfiguration::pipelineLibraries[0]
145 		bool				m_frozen;		// No sense to calculate offsets before the tree structure is fully constructed.
146 		uint32_t			m_flatOffset;	// Calculated offset in the flattened array.
147 	};
148 
149 public:
PipelineTree()150 	PipelineTree ()
151 		: m_nodes				()
152 		, m_root				(nullptr)
153 		, m_frozen				(false)
154 		, m_offsetsCalculated	(false)
155 	{}
156 
157 	// See LibraryConfiguration::pipelineLibraries.
addNode(int64_t parent,uint32_t groupCount)158 	void addNode (int64_t parent, uint32_t groupCount)
159 	{
160 		DE_ASSERT(m_nodes.size() < static_cast<size_t>(std::numeric_limits<uint32_t>::max()));
161 
162 		if (parent < 0)
163 		{
164 			DE_ASSERT(!m_root);
165 			m_nodes.emplace_back(new Node(parent, groupCount));
166 			m_root = m_nodes.back().get();
167 		}
168 		else
169 		{
170 			DE_ASSERT(parent < static_cast<int64_t>(m_nodes.size()));
171 			m_nodes.emplace_back(new Node(parent, groupCount));
172 			m_nodes.at(static_cast<size_t>(parent))->appendChild(m_nodes.back().get());
173 		}
174 	}
175 
176 	// Confirms we will not be adding more nodes to the tree.
freeze(void)177 	void freeze (void)
178 	{
179 		for (auto& node : m_nodes)
180 			node->freeze();
181 		m_frozen = true;
182 	}
183 
184 	// When obtaining shader group handles from the root pipeline, we get a vector of handles in which some of those handles come from pipeline libraries.
185 	// This method returns, for each pipeline, the offset of its shader group handles in that vector as the number of shader groups (not bytes).
getGroupOffsets(void)186 	std::vector<uint32_t> getGroupOffsets (void)
187 	{
188 		DE_ASSERT(m_frozen);
189 
190 		if (!m_offsetsCalculated)
191 		{
192 			calcOffsets();
193 			m_offsetsCalculated = true;
194 		}
195 
196 		std::vector<uint32_t> offsets;
197 		offsets.reserve(m_nodes.size());
198 
199 		for (const auto& node : m_nodes)
200 			offsets.push_back(node->getOffset());
201 
202 		return offsets;
203 	}
204 
205 protected:
calcOffsets(void)206 	void calcOffsets (void)
207 	{
208 		DE_ASSERT(m_frozen);
209 		if (m_root)
210 		{
211 			m_root->calcOffsetRecursively(0);
212 		}
213 	}
214 
215 	std::vector<std::unique_ptr<Node>>	m_nodes;
216 	Node*								m_root;
217 	bool								m_frozen;
218 	bool								m_offsetsCalculated;
219 };
220 
makeImageCreateInfo(deUint32 width,deUint32 height,VkFormat format)221 VkImageCreateInfo makeImageCreateInfo (deUint32 width, deUint32 height, VkFormat format)
222 {
223 	const VkImageCreateInfo			imageCreateInfo			=
224 	{
225 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,																// VkStructureType			sType;
226 		DE_NULL,																							// const void*				pNext;
227 		(VkImageCreateFlags)0u,																				// VkImageCreateFlags		flags;
228 		VK_IMAGE_TYPE_2D,																					// VkImageType				imageType;
229 		format,																								// VkFormat					format;
230 		makeExtent3D(width, height, 1),																		// VkExtent3D				extent;
231 		1u,																									// deUint32					mipLevels;
232 		1u,																									// deUint32					arrayLayers;
233 		VK_SAMPLE_COUNT_1_BIT,																				// VkSampleCountFlagBits	samples;
234 		VK_IMAGE_TILING_OPTIMAL,																			// VkImageTiling			tiling;
235 		VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
236 		VK_SHARING_MODE_EXCLUSIVE,																			// VkSharingMode			sharingMode;
237 		0u,																									// deUint32					queueFamilyIndexCount;
238 		DE_NULL,																							// const deUint32*			pQueueFamilyIndices;
239 		VK_IMAGE_LAYOUT_UNDEFINED																			// VkImageLayout			initialLayout;
240 	};
241 
242 	return imageCreateInfo;
243 }
244 
245 class RayTracingPipelineLibraryTestCase : public TestCase
246 {
247 	public:
248 							RayTracingPipelineLibraryTestCase	(tcu::TestContext& context, const char* name, const TestParams data);
249 							~RayTracingPipelineLibraryTestCase	(void);
250 
251 	virtual void			checkSupport								(Context& context) const;
252 	virtual	void			initPrograms								(SourceCollections& programCollection) const;
253 	virtual TestInstance*	createInstance								(Context& context) const;
254 private:
255 	TestParams				m_data;
256 };
257 
258 class RayTracingPipelineLibraryTestInstance : public TestInstance
259 {
260 public:
261 																	RayTracingPipelineLibraryTestInstance	(Context& context, const TestParams& data);
262 																	~RayTracingPipelineLibraryTestInstance	(void);
263 	tcu::TestStatus													iterate									(void);
264 
265 protected:
266 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures		(VkCommandBuffer cmdBuffer);
267 	de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure			(VkCommandBuffer cmdBuffer,
268 																											 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >& bottomLevelAccelerationStructures);
269 	std::vector<uint32_t>											runTest									(bool replay = false);
270 private:
271 	TestParams														m_data;
272 	PipelineTree													m_pipelineTree;
273 	std::vector<uint8_t>											m_captureReplayHandles;
274 };
275 
276 
RayTracingPipelineLibraryTestCase(tcu::TestContext & context,const char * name,const TestParams data)277 RayTracingPipelineLibraryTestCase::RayTracingPipelineLibraryTestCase (tcu::TestContext& context, const char* name, const TestParams data)
278 	: vkt::TestCase	(context, name)
279 	, m_data		(data)
280 {
281 }
282 
~RayTracingPipelineLibraryTestCase(void)283 RayTracingPipelineLibraryTestCase::~RayTracingPipelineLibraryTestCase	(void)
284 {
285 }
286 
checkSupport(Context & context) const287 void RayTracingPipelineLibraryTestCase::checkSupport(Context& context) const
288 {
289 	context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
290 	context.requireDeviceFunctionality("VK_KHR_pipeline_library");
291 
292 	if (m_data.testType != TestType::DEFAULT)
293 		context.requireDeviceFunctionality("VK_EXT_pipeline_library_group_handles");
294 
295 	if (m_data.useLinkTimeOptimizations)
296 		context.requireDeviceFunctionality("VK_EXT_graphics_pipeline_library");
297 
298 	if (m_data.useMaintenance5)
299 		context.requireDeviceFunctionality("VK_KHR_maintenance5");
300 
301 	if (m_data.includesCaptureReplay())
302 	{
303 		const auto& rtFeatures = context.getRayTracingPipelineFeatures();
304 		if (!rtFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay)
305 			TCU_THROW(NotSupportedError, "rayTracingPipelineShaderGroupHandleCaptureReplay not supported");
306 	}
307 
308 }
309 
initPrograms(SourceCollections & programCollection) const310 void RayTracingPipelineLibraryTestCase::initPrograms (SourceCollections& programCollection) const
311 {
312 	const vk::ShaderBuildOptions	buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
313 
314 	{
315 		std::stringstream css;
316 		css <<
317 			"#version 460 core\n"
318 			"#extension GL_EXT_ray_tracing : require\n"
319 			"layout(location = 0) rayPayloadEXT uvec4 hitValue;\n"
320 			"layout(r32ui, set = 0, binding = 0) uniform uimage2D result;\n"
321 			"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
322 			"\n"
323 			"void main()\n"
324 			"{\n"
325 			"  float tmin     = 0.0;\n"
326 			"  float tmax     = 1.0;\n"
327 			"  vec3  origin   = vec3(float(gl_LaunchIDEXT.x) + 0.5f, float(gl_LaunchIDEXT.y) + 0.5f, float(gl_LaunchIDEXT.z + 0.5f));\n"
328 			"  vec3  direct   = vec3(0.0, 0.0, -1.0);\n"
329 			"  hitValue       = uvec4(" << RTPL_MAX_CHIT_SHADER_COUNT+1 << ",0,0,0);\n"
330 			"  traceRayEXT(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
331 			"  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n"
332 			"}\n";
333 		programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
334 	}
335 
336 	{
337 		std::stringstream css;
338 		css <<
339 			"#version 460 core\n"
340 			"#extension GL_EXT_ray_tracing : require\n"
341 			"layout(location = 0) rayPayloadInEXT uvec4 hitValue;\n"
342 			"void main()\n"
343 			"{\n"
344 			"  hitValue = uvec4("<< RTPL_MAX_CHIT_SHADER_COUNT <<",0,0,1);\n"
345 			"}\n";
346 
347 		programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
348 	}
349 
350 	if (m_data.useAABBs)
351 	{
352 		std::ostringstream isec;
353 		isec
354 			<< "#version 460 core\n"
355 			<< "#extension GL_EXT_ray_tracing : require\n"
356 			<< "void main()\n"
357 			<< "{\n"
358 			<< "  reportIntersectionEXT(gl_RayTminEXT, 0);\n"
359 			<< "}\n"
360 			;
361 		programCollection.glslSources.add("isec") << glu::IntersectionSource(updateRayTracingGLSL(isec.str())) << buildOptions;
362 	}
363 
364 	for(deUint32 i=0; i<RTPL_MAX_CHIT_SHADER_COUNT; ++i)
365 	{
366 		std::stringstream css;
367 		css <<
368 			"#version 460 core\n"
369 			"#extension GL_EXT_ray_tracing : require\n"
370 			"layout(location = 0) rayPayloadInEXT uvec4 hitValue;\n"
371 			"void main()\n"
372 			"{\n"
373 			"  hitValue = uvec4(" << i << ",0,0,1);\n"
374 			"}\n";
375 		std::stringstream csname;
376 		csname << "chit" << i;
377 		programCollection.glslSources.add(csname.str()) << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
378 	}
379 }
380 
createInstance(Context & context) const381 TestInstance* RayTracingPipelineLibraryTestCase::createInstance (Context& context) const
382 {
383 	return new RayTracingPipelineLibraryTestInstance(context, m_data);
384 }
385 
RayTracingPipelineLibraryTestInstance(Context & context,const TestParams & data)386 RayTracingPipelineLibraryTestInstance::RayTracingPipelineLibraryTestInstance (Context& context, const TestParams& data)
387 	: vkt::TestInstance		(context)
388 	, m_data				(data)
389 	, m_pipelineTree		()
390 {
391 	// Build the helper pipeline tree, which helps for some tests.
392 	m_pipelineTree.addNode(-1, static_cast<uint32_t>(m_data.libraryConfiguration.pipelineShaders + 2/*rgen and miss for the root pipeline*/));
393 
394 	for (const auto& lib : m_data.libraryConfiguration.pipelineLibraries)
395 		m_pipelineTree.addNode(lib.x(), static_cast<uint32_t>(lib.y()));
396 
397 	m_pipelineTree.freeze();
398 }
399 
~RayTracingPipelineLibraryTestInstance(void)400 RayTracingPipelineLibraryTestInstance::~RayTracingPipelineLibraryTestInstance (void)
401 {
402 }
403 
initBottomAccelerationStructures(VkCommandBuffer cmdBuffer)404 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > RayTracingPipelineLibraryTestInstance::initBottomAccelerationStructures (VkCommandBuffer cmdBuffer)
405 {
406 	const auto&														vkd			= m_context.getDeviceInterface();
407 	const auto														device		= m_context.getDevice();
408 	auto&															allocator	= m_context.getDefaultAllocator();
409 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
410 
411 	tcu::Vec3 v0(0.0, 1.0, 0.0);
412 	tcu::Vec3 v1(0.0, 0.0, 0.0);
413 	tcu::Vec3 v2(1.0, 1.0, 0.0);
414 	tcu::Vec3 v3(1.0, 0.0, 0.0);
415 
416 	for (deUint32 y = 0; y < m_data.height; ++y)
417 		for (deUint32 x = 0; x < m_data.width; ++x)
418 		{
419 			// let's build a 3D chessboard of geometries
420 			if (((x + y) % 2) == 0)
421 				continue;
422 			tcu::Vec3 xyz((float)x, (float)y, 0.0f);
423 			std::vector<tcu::Vec3>	geometryData;
424 
425 			de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
426 			bottomLevelAccelerationStructure->setGeometryCount(1u);
427 
428 			if (m_data.useAABBs)
429 			{
430 				geometryData.push_back(xyz + v1);
431 				geometryData.push_back(xyz + v2);
432 			}
433 			else
434 			{
435 				geometryData.push_back(xyz + v0);
436 				geometryData.push_back(xyz + v1);
437 				geometryData.push_back(xyz + v2);
438 				geometryData.push_back(xyz + v2);
439 				geometryData.push_back(xyz + v1);
440 				geometryData.push_back(xyz + v3);
441 			}
442 
443 			bottomLevelAccelerationStructure->addGeometry(geometryData, !m_data.useAABBs/*triangles*/);
444 			bottomLevelAccelerationStructure->createAndBuild(vkd, device, cmdBuffer, allocator);
445 			result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
446 		}
447 
448 	return result;
449 }
450 
initTopAccelerationStructure(VkCommandBuffer cmdBuffer,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)451 de::MovePtr<TopLevelAccelerationStructure> RayTracingPipelineLibraryTestInstance::initTopAccelerationStructure (VkCommandBuffer cmdBuffer,
452 																												std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >& bottomLevelAccelerationStructures)
453 {
454 	const auto&									vkd			= m_context.getDeviceInterface();
455 	const auto									device		= m_context.getDevice();
456 	auto&										allocator	= m_context.getDefaultAllocator();
457 
458 	deUint32 instanceCount = m_data.width * m_data.height / 2;
459 
460 	de::MovePtr<TopLevelAccelerationStructure>	result = makeTopLevelAccelerationStructure();
461 	result->setInstanceCount(instanceCount);
462 
463 	deUint32 currentInstanceIndex	= 0;
464 	deUint32 numShadersUsed			= m_data.getHitGroupCount();
465 
466 	for (deUint32 y = 0; y < m_data.height; ++y)
467 		for (deUint32 x = 0; x < m_data.width; ++x)
468 		{
469 			if (((x + y) % 2) == 0)
470 				continue;
471 
472 			result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex], identityMatrix3x4, 0, 0xFF, currentInstanceIndex % numShadersUsed, 0U);
473 			currentInstanceIndex++;
474 		}
475 	result->createAndBuild(vkd, device, cmdBuffer, allocator);
476 
477 	return result;
478 }
479 
compileShaders(Context & context,de::SharedPtr<de::MovePtr<RayTracingPipeline>> & pipeline,const std::vector<std::tuple<std::string,VkShaderStageFlagBits>> & shaderData,const Move<VkShaderModule> & isecMod)480 void compileShaders (Context& context,
481 					 de::SharedPtr<de::MovePtr<RayTracingPipeline>>& pipeline,
482 					 const std::vector<std::tuple<std::string, VkShaderStageFlagBits>>& shaderData,
483 					 const Move<VkShaderModule>& isecMod)
484 {
485 	const auto&	vkd			= context.getDeviceInterface();
486 	const auto	device		= context.getDevice();
487 	const auto&	binaries	= context.getBinaryCollection();
488 	const bool	hasISec		= static_cast<bool>(isecMod);
489 
490 	for (deUint32 i=0; i< shaderData.size(); ++i)
491 	{
492 		std::string				shaderName;
493 		VkShaderStageFlagBits	shaderStage;
494 		std::tie(shaderName, shaderStage) = shaderData[i];
495 
496 		auto pipelinePtr = pipeline->get();
497 		pipelinePtr->addShader(shaderStage, createShaderModule(vkd, device, binaries.get(shaderName)), i);
498 		if (hasISec && shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)
499 			pipelinePtr->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, isecMod.get(), i);
500 	}
501 }
502 
503 struct CompileShadersMultithreadData
504 {
505 	Context&															context;
506 	de::SharedPtr<de::MovePtr<RayTracingPipeline>>&						pipeline;
507 	const std::vector<std::tuple<std::string, VkShaderStageFlagBits>>&	shaderData;
508 	const Move<VkShaderModule>&											isecMod;
509 };
510 
compileShadersThread(void * param)511 void compileShadersThread (void* param)
512 {
513 	CompileShadersMultithreadData* csmd = (CompileShadersMultithreadData*)param;
514 	compileShaders(csmd->context, csmd->pipeline, csmd->shaderData, csmd->isecMod);
515 }
516 
getAllGroupCounts(const std::vector<de::SharedPtr<de::MovePtr<RayTracingPipeline>>> & rayTracingPipelines)517 std::vector<uint32_t> getAllGroupCounts (const std::vector<de::SharedPtr<de::MovePtr<RayTracingPipeline>>>& rayTracingPipelines)
518 {
519 	std::vector<uint32_t> allGroupCounts;
520 	allGroupCounts.reserve(rayTracingPipelines.size());
521 	std::transform(begin(rayTracingPipelines), end(rayTracingPipelines), std::back_inserter(allGroupCounts),
522 		[](const de::SharedPtr<de::MovePtr<RayTracingPipeline>>& rtPipeline) { return rtPipeline->get()->getFullShaderGroupCount(); });
523 
524 	return allGroupCounts;
525 }
526 
527 // Sometimes we want to obtain shader group handles and do checks on them, and the processing we do is the same for normal handles
528 // and for capture/replay handles. Yet their sizes can be different, and the function to get them also changes. The type below
529 // provides a small abstraction so we only have to choose the right class to instantiate, and the rest of the code is the same.
530 class HandleGetter
531 {
532 public:
HandleGetter(const uint32_t handleSize)533 	HandleGetter			(const uint32_t handleSize) : m_handleSize(handleSize)	{}
~HandleGetter()534 	virtual ~HandleGetter	()														{}
535 
536 	virtual std::vector<uint8_t> getShaderGroupHandlesVector (const RayTracingPipeline*	rtPipeline,
537 															  const DeviceInterface&	vkd,
538 															  const VkDevice			device,
539 															  const VkPipeline			pipeline,
540 															  const uint32_t			firstGroup,
541 															  const uint32_t			groupCount) const = 0;
542 
543 protected:
544 	const uint32_t m_handleSize;
545 };
546 
547 class NormalHandleGetter : public HandleGetter
548 {
549 public:
NormalHandleGetter(const uint32_t shaderGroupHandleSize)550 	NormalHandleGetter			(const uint32_t shaderGroupHandleSize) : HandleGetter(shaderGroupHandleSize)	{}
~NormalHandleGetter()551 	virtual ~NormalHandleGetter	()																				{}
552 
getShaderGroupHandlesVector(const RayTracingPipeline * rtPipeline,const DeviceInterface & vkd,const VkDevice device,const VkPipeline pipeline,const uint32_t firstGroup,const uint32_t groupCount) const553 	std::vector<uint8_t> getShaderGroupHandlesVector (const RayTracingPipeline*	rtPipeline,
554 													  const DeviceInterface&	vkd,
555 													  const VkDevice			device,
556 													  const VkPipeline			pipeline,
557 													  const uint32_t			firstGroup,
558 													  const uint32_t			groupCount) const override
559 	{
560 		return rtPipeline->getShaderGroupHandles(vkd, device, pipeline, m_handleSize, firstGroup, groupCount);
561 	}
562 };
563 
564 class CaptureReplayHandleGetter : public HandleGetter
565 {
566 public:
CaptureReplayHandleGetter(const uint32_t shaderGroupHandleCaptureReplaySize)567 	CaptureReplayHandleGetter			(const uint32_t shaderGroupHandleCaptureReplaySize) : HandleGetter(shaderGroupHandleCaptureReplaySize)	{}
~CaptureReplayHandleGetter()568 	virtual ~CaptureReplayHandleGetter	()																				{}
569 
getShaderGroupHandlesVector(const RayTracingPipeline * rtPipeline,const DeviceInterface & vkd,const VkDevice device,const VkPipeline pipeline,const uint32_t firstGroup,const uint32_t groupCount) const570 	std::vector<uint8_t> getShaderGroupHandlesVector (const RayTracingPipeline*	rtPipeline,
571 													  const DeviceInterface&	vkd,
572 													  const VkDevice			device,
573 													  const VkPipeline			pipeline,
574 													  const uint32_t			firstGroup,
575 													  const uint32_t			groupCount) const override
576 	{
577 		return rtPipeline->getShaderGroupReplayHandles(vkd, device, pipeline, m_handleSize, firstGroup, groupCount);
578 	}
579 };
580 
runTest(bool replay)581 std::vector<uint32_t> RayTracingPipelineLibraryTestInstance::runTest (bool replay)
582 {
583 	const InstanceInterface&			vki									= m_context.getInstanceInterface();
584 	const VkPhysicalDevice				physicalDevice						= m_context.getPhysicalDevice();
585 	const auto&							vkd									= m_context.getDeviceInterface();
586 	const auto							device								= m_context.getDevice();
587 	const auto							queueFamilyIndex					= m_context.getUniversalQueueFamilyIndex();
588 	const auto							queue								= m_context.getUniversalQueue();
589 	auto&								allocator							= m_context.getDefaultAllocator();
590 	const auto							pixelCount							= m_data.getPixelCount();
591 	const auto							hitGroupCount						= m_data.getHitGroupCount();
592 	const auto							rayTracingProperties				= makeRayTracingProperties(vki, physicalDevice);
593 	const uint32_t						shaderGroupHandleSize				= rayTracingProperties->getShaderGroupHandleSize();
594 	const uint32_t						shaderGroupBaseAlignment			= rayTracingProperties->getShaderGroupBaseAlignment();
595 	const uint32_t						shaderGroupHandleReplaySize			= rayTracingProperties->getShaderGroupHandleCaptureReplaySize();
596 	const auto							allGroupOffsets						= m_pipelineTree.getGroupOffsets();
597 
598 	// Make sure we only replay in CAPTURE_REPLAY handles mode.
599 	// When checking capture/replay handles, the first iteration will save the handles to m_captureReplayHandles.
600 	// In the second iteration, the replay argument will be true and we'll use the saved m_captureReplayHandles when creating pipelines.
601 	if (replay)
602 		DE_ASSERT(m_data.includesCaptureReplay());
603 
604 	const Move<VkDescriptorSetLayout>	descriptorSetLayout					= DescriptorSetLayoutBuilder()
605 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
606 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
607 																					.build(vkd, device);
608 	const Move<VkDescriptorPool>		descriptorPool						= DescriptorPoolBuilder()
609 																					.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
610 																					.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
611 																					.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
612 	const Move<VkDescriptorSet>			descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
613 	const Move<VkPipelineLayout>		pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
614 
615 	// sort pipeline library configurations ( including main pipeline )
616 	std::vector<std::tuple<int, deUint32, deUint32>> pipelineInfoList;
617 	{
618 		// push main pipeline on the list
619 		deUint32 shaderOffset	= 0U;
620 		pipelineInfoList.push_back(std::make_tuple(-1, shaderOffset, m_data.libraryConfiguration.pipelineShaders));
621 		shaderOffset			+= m_data.libraryConfiguration.pipelineShaders;
622 
623 		for (size_t i = 0; i < m_data.libraryConfiguration.pipelineLibraries.size(); ++i)
624 		{
625 			int parentIndex			= m_data.libraryConfiguration.pipelineLibraries[i].x();
626 			deUint32 shaderCount	= deUint32(m_data.libraryConfiguration.pipelineLibraries[i].y());
627 			if (parentIndex < 0 || parentIndex >= int(pipelineInfoList.size()) )
628 				TCU_THROW(InternalError, "Wrong library tree definition");
629 			pipelineInfoList.push_back(std::make_tuple(parentIndex, shaderOffset, shaderCount));
630 			shaderOffset			+= shaderCount;
631 		}
632 	}
633 
634 	// create pipeline libraries and build a pipeline tree.
635 	std::vector<de::SharedPtr<de::MovePtr<RayTracingPipeline>>>					rtPipelines(pipelineInfoList.size());
636 	std::vector<std::vector<std::tuple<std::string, VkShaderStageFlagBits>>>	pipelineShaders(pipelineInfoList.size());
637 	for (size_t idx=0; idx < pipelineInfoList.size(); ++idx)
638 	{
639 		int			parentIndex;
640 		deUint32	shaderCount, shaderOffset;
641 		std::tie(parentIndex, shaderOffset, shaderCount) = pipelineInfoList[idx];
642 
643 		// create pipeline objects
644 		de::SharedPtr<de::MovePtr<RayTracingPipeline>> rtPipeline = makeVkSharedPtr(de::MovePtr<RayTracingPipeline>(new RayTracingPipeline));
645 
646 		(*rtPipeline)->setDeferredOperation(m_data.pipelinesCreatedUsingDHO);
647 
648 		VkPipelineCreateFlags creationFlags = 0u;
649 
650 		// all pipelines are pipeline libraries, except for the main pipeline
651 		if (idx > 0)
652 			creationFlags |= VK_PIPELINE_CREATE_LIBRARY_BIT_KHR;
653 
654 		// Sometimes we need capture/replay handles.
655 		if (m_data.includesCaptureReplay())
656 			creationFlags |= VK_PIPELINE_CREATE_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR;
657 
658 		if (m_data.useLinkTimeOptimizations)
659 		{
660 			if (m_data.retainLinkTimeOptimizations)
661 				creationFlags |= VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
662 			else
663 				creationFlags |= VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT;
664 		}
665 
666 		rtPipeline->get()->setCreateFlags(creationFlags);
667 		if (m_data.useMaintenance5)
668 			rtPipeline->get()->setCreateFlags2(translateCreateFlag(creationFlags));
669 
670 		rtPipeline->get()->setMaxPayloadSize(16U); // because rayPayloadInEXT is uvec4 ( = 16 bytes ) for all chit shaders
671 		rtPipelines[idx] = rtPipeline;
672 
673 		// prepare all shader names for all pipelines
674 		if (idx == 0)
675 		{
676 			pipelineShaders[0].push_back(std::make_tuple( "rgen", VK_SHADER_STAGE_RAYGEN_BIT_KHR ));
677 			pipelineShaders[0].push_back(std::make_tuple( "miss", VK_SHADER_STAGE_MISS_BIT_KHR ));
678 		}
679 		for (uint32_t i = 0; i < shaderCount; ++i)
680 		{
681 			std::stringstream csname;
682 			csname << "chit" << shaderOffset + i;
683 			pipelineShaders[idx].push_back(std::make_tuple( csname.str(), VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR ));
684 		}
685 	}
686 
687 	const auto isecMod	= (m_data.useAABBs
688 						? createShaderModule(vkd, device, m_context.getBinaryCollection().get("isec"))
689 						: Move<VkShaderModule>());
690 
691 	// singlethreaded / multithreaded compilation of all shaders
692 	if (m_data.multithreadedCompilation)
693 	{
694 		std::vector<CompileShadersMultithreadData> csmds;
695 		for (deUint32 i = 0; i < rtPipelines.size(); ++i)
696 			csmds.push_back(CompileShadersMultithreadData{ m_context, rtPipelines[i], pipelineShaders[i], isecMod });
697 
698 		std::vector<deThread>	threads;
699 		for (deUint32 i = 0; i < csmds.size(); ++i)
700 			threads.push_back(deThread_create(compileShadersThread, (void*)&csmds[i], DE_NULL));
701 
702 		for (deUint32 i = 0; i < threads.size(); ++i)
703 		{
704 			deThread_join(threads[i]);
705 			deThread_destroy(threads[i]);
706 		}
707 	}
708 	else // m_data.multithreadedCompilation == false
709 	{
710 		for (deUint32 i = 0; i < rtPipelines.size(); ++i)
711 			compileShaders(m_context, rtPipelines[i], pipelineShaders[i], isecMod);
712 	}
713 
714 	// connect libraries into a tree structure
715 	for (size_t idx = 0; idx < pipelineInfoList.size(); ++idx)
716 	{
717 		int			parentIndex;
718 		deUint32 shaderCount, shaderOffset;
719 		std::tie(parentIndex, shaderOffset, shaderCount) = pipelineInfoList[idx];
720 		if (parentIndex != -1)
721 			rtPipelines[parentIndex]->get()->addLibrary(rtPipelines[idx]);
722 	}
723 
724 	// Add the saved capture/replay handles when in replay mode.
725 	if (replay)
726 	{
727 		for (size_t pipelineIdx = 0; pipelineIdx < rtPipelines.size(); ++pipelineIdx)
728 		{
729 			const auto pipelineOffsetBytes = allGroupOffsets.at(pipelineIdx) * shaderGroupHandleReplaySize;
730 			for (size_t groupIdx = 0; groupIdx < pipelineShaders.at(pipelineIdx).size(); ++groupIdx)
731 			{
732 				const auto groupOffsetBytes = pipelineOffsetBytes + groupIdx * shaderGroupHandleReplaySize;
733 				rtPipelines[pipelineIdx]->get()->setGroupCaptureReplayHandle(static_cast<uint32_t>(groupIdx), &m_captureReplayHandles.at(groupOffsetBytes));
734 			}
735 		}
736 	}
737 
738 	// build main pipeline and all pipeline libraries that it depends on
739 	const auto										firstRTPipeline	= rtPipelines.at(0)->get();
740 	std::vector<de::SharedPtr<Move<VkPipeline>>>	pipelines		= firstRTPipeline->createPipelineWithLibraries(vkd, device, *pipelineLayout);
741 	const VkPipeline								pipeline		= pipelines.at(0)->get();
742 
743 	// Obtain and verify shader group handles.
744 	if (m_data.testType != TestType::DEFAULT)
745 	{
746 		// When checking all handles, we'll do two iterations, checking the normal handles first and the capture/replay handles later.
747 		const bool					checkAllHandles	= (m_data.testType == TestType::CHECK_ALL_HANDLES);
748 		const uint32_t				iterations		= (checkAllHandles ? 2u : 1u);
749 
750 		for (uint32_t iter = 0u; iter < iterations; ++iter)
751 		{
752 			const bool					normalHandles	= (iter == 0u && m_data.testType != TestType::CHECK_CAPTURE_REPLAY_HANDLES);
753 			const auto					handleSize		= (normalHandles ? shaderGroupHandleSize : shaderGroupHandleReplaySize);
754 			de::MovePtr<HandleGetter>	handleGetter	(normalHandles
755 														? static_cast<HandleGetter*>(new NormalHandleGetter(handleSize))
756 														: static_cast<HandleGetter*>(new CaptureReplayHandleGetter(handleSize)));
757 
758 			const auto allHandles		= handleGetter->getShaderGroupHandlesVector(firstRTPipeline, vkd, device, pipeline, 0u, firstRTPipeline->getFullShaderGroupCount());
759 			const auto allGroupCounts	= getAllGroupCounts(rtPipelines);
760 
761 			DE_ASSERT(allGroupOffsets.size() == rtPipelines.size());
762 			DE_ASSERT(allGroupCounts.size() == rtPipelines.size());
763 			DE_ASSERT(rtPipelines.size() == pipelines.size());
764 
765 			for (size_t idx = 0; idx < rtPipelines.size(); ++idx)
766 			{
767 				const auto	curRTPipeline	= rtPipelines[idx]->get();
768 				const auto&	curPipeline		= pipelines[idx]->get();
769 				const auto&	curGroupOffset	= allGroupOffsets[idx];
770 				const auto& curGroupCount	= allGroupCounts[idx];
771 				const auto	curHandles		= handleGetter->getShaderGroupHandlesVector(curRTPipeline, vkd, device, curPipeline, 0u, curGroupCount);
772 
773 				const auto	rangeStart		= curGroupOffset * shaderGroupHandleSize;
774 				const auto	rangeEnd		= (curGroupOffset + curGroupCount) * shaderGroupHandleSize;
775 
776 				const std::vector<uint8_t> handleRange (allHandles.begin() + rangeStart, allHandles.begin() + rangeEnd);
777 				if (handleRange != curHandles)
778 				{
779 					std::ostringstream msg;
780 					msg << (normalHandles ? "" : "Capture Replay ") << "Shader Group Handle verification failed for pipeline " << idx;
781 					TCU_FAIL(msg.str());
782 				}
783 			}
784 
785 			// Save or check capture/replay handles.
786 			if (!normalHandles)
787 			{
788 				if (replay)
789 				{
790 					// Check saved handles.
791 					if (allHandles != m_captureReplayHandles)
792 						TCU_FAIL("Capture Replay Shader Group Handles do not match creation handles for top-level pipeline");
793 				}
794 				else
795 				{
796 					// Save handles for the replay phase.
797 					m_captureReplayHandles = allHandles;
798 				}
799 			}
800 		}
801 	}
802 
803 	// build shader binding tables
804 	const de::MovePtr<BufferWithMemory>		raygenShaderBindingTable			= firstRTPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
805 	const de::MovePtr<BufferWithMemory>		missShaderBindingTable				= firstRTPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
806 	const de::MovePtr<BufferWithMemory>		hitShaderBindingTable				= firstRTPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, hitGroupCount);
807 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
808 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
809 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0), shaderGroupHandleSize, hitGroupCount * shaderGroupHandleSize);
810 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
811 
812 	const VkFormat						imageFormat							= VK_FORMAT_R32_UINT;
813 	const VkImageCreateInfo				imageCreateInfo						= makeImageCreateInfo(m_data.width, m_data.height, imageFormat);
814 	const VkImageSubresourceRange		imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
815 	const de::MovePtr<ImageWithMemory>	image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
816 	const Move<VkImageView>				imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, imageFormat, imageSubresourceRange);
817 
818 	const VkBufferCreateInfo			resultBufferCreateInfo				= makeBufferCreateInfo(pixelCount*sizeof(deUint32), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
819 	const VkImageSubresourceLayers		resultBufferImageSubresourceLayers	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
820 	const VkBufferImageCopy				resultBufferImageRegion				= makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1), resultBufferImageSubresourceLayers);
821 	de::MovePtr<BufferWithMemory>		resultBuffer						= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
822 	auto&								resultBufferAlloc					= resultBuffer->getAllocation();
823 
824 	const VkDescriptorImageInfo			descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
825 
826 	const Move<VkCommandPool>			cmdPool								= createCommandPool(vkd, device, 0, queueFamilyIndex);
827 	const Move<VkCommandBuffer>			cmdBuffer							= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
828 
829 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	bottomLevelAccelerationStructures;
830 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructure;
831 
832 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
833 	{
834 		const VkImageMemoryBarrier			preImageBarrier						= makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
835 																					VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
836 																					**image, imageSubresourceRange);
837 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
838 
839 		const VkClearValue					clearValue							= makeClearValueColorU32(0xFF, 0u, 0u, 0u);
840 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
841 
842 		const VkImageMemoryBarrier			postImageBarrier					= makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
843 																					VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
844 																					**image, imageSubresourceRange);
845 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
846 
847 		bottomLevelAccelerationStructures	= initBottomAccelerationStructures(*cmdBuffer);
848 		topLevelAccelerationStructure		= initTopAccelerationStructure(*cmdBuffer, bottomLevelAccelerationStructures);
849 
850 		const TopLevelAccelerationStructure*			topLevelAccelerationStructurePtr		= topLevelAccelerationStructure.get();
851 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
852 		{
853 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
854 			DE_NULL,															//  const void*							pNext;
855 			1u,																	//  deUint32							accelerationStructureCount;
856 			topLevelAccelerationStructurePtr->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
857 		};
858 
859 		DescriptorSetUpdateBuilder()
860 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
861 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
862 			.update(vkd, device);
863 
864 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
865 
866 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline);
867 
868 		cmdTraceRays(vkd,
869 			*cmdBuffer,
870 			&raygenShaderBindingTableRegion,
871 			&missShaderBindingTableRegion,
872 			&hitShaderBindingTableRegion,
873 			&callableShaderBindingTableRegion,
874 			m_data.width, m_data.height, 1);
875 
876 		const VkMemoryBarrier							postTraceMemoryBarrier					= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
877 		const VkMemoryBarrier							postCopyMemoryBarrier					= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
878 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
879 
880 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u, &resultBufferImageRegion);
881 
882 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
883 	}
884 	endCommandBuffer(vkd, *cmdBuffer);
885 
886 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
887 
888 	invalidateAlloc(vkd, device, resultBufferAlloc);
889 
890 	std::vector<uint32_t> resultVector (pixelCount);
891 	deMemcpy(resultVector.data(), resultBufferAlloc.getHostPtr(), de::dataSize(resultVector));
892 
893 	return resultVector;
894 }
895 
iterate(void)896 tcu::TestStatus RayTracingPipelineLibraryTestInstance::iterate (void)
897 {
898 	// run test using arrays of pointers
899 	const auto	numShadersUsed	= m_data.getHitGroupCount();
900 	const auto	bufferVec		= runTest();
901 
902 	if (m_data.includesCaptureReplay())
903 	{
904 		const auto replayResults = runTest(true/*replay*/);
905 		if (bufferVec != replayResults)
906 			return tcu::TestStatus::fail("Replay results differ from original results");
907 	}
908 
909 	deUint32	failures		= 0;
910 	deUint32	pos				= 0;
911 	deUint32	shaderIdx		= 0;
912 
913 	// Verify results.
914 	for (deUint32 y = 0; y < m_data.height; ++y)
915 		for (deUint32 x = 0; x < m_data.width; ++x)
916 		{
917 			deUint32 expectedResult;
918 			if ((x + y) % 2)
919 			{
920 				expectedResult = shaderIdx % numShadersUsed;
921 				++shaderIdx;
922 			}
923 			else
924 				expectedResult = RTPL_MAX_CHIT_SHADER_COUNT;
925 
926 			if (bufferVec.at(pos) != expectedResult)
927 				failures++;
928 
929 			++pos;
930 		}
931 
932 	if (failures == 0)
933 		return tcu::TestStatus::pass("Pass");
934 	else
935 		return tcu::TestStatus::fail("failures=" + de::toString(failures));
936 }
937 
938 }	// anonymous
939 
addPipelineLibraryConfigurationsTests(tcu::TestCaseGroup * group)940 void addPipelineLibraryConfigurationsTests (tcu::TestCaseGroup* group)
941 {
942 	struct ThreadData
943 	{
944 		bool									multithreaded;
945 		bool									pipelinesCreatedUsingDHO;
946 		const char*								name;
947 	} threadData[] =
948 	{
949 		{ false,	false,	"singlethreaded_compilation"	},
950 		{ true,		false,	"multithreaded_compilation"		},
951 		{ true,		true,	"multithreaded_compilation_dho"	},
952 	};
953 
954 	struct LibraryConfigurationData
955 	{
956 		LibraryConfiguration		libraryConfiguration;
957 		const char*					name;
958 	} libraryConfigurationData[] =
959 	{
960 		{ {0, { { 0, 1 } } },								"s0_l1"			},	// 0 shaders in a main pipeline. 1 pipeline library with 1 shader
961 		{ {1, { { 0, 1 } } },								"s1_l1"			},	// 1 shader  in a main pipeline. 1 pipeline library with 1 shader
962 		{ {0, { { 0, 1 }, { 0, 1 } } },						"s0_l11"		},	// 0 shaders in a main pipeline. 2 pipeline libraries with 1 shader each
963 		{ {3, { { 0, 1 }, { 0, 1 } } },						"s3_l11"		},	// 3 shaders in a main pipeline. 2 pipeline libraries with 1 shader each
964 		{ {0, { { 0, 2 }, { 0, 3 } } },						"s0_l23"		},	// 0 shaders in a main pipeline. 2 pipeline libraries with 2 and 3 shaders respectively
965 		{ {2, { { 0, 2 }, { 0, 3 } } },						"s2_l23"		},	// 2 shaders in a main pipeline. 2 pipeline libraries with 2 and 3 shaders respectively
966 		{ {0, { { 0, 1 }, { 1, 1 } } },						"s0_l1_l1"		},	// 0 shaders in a main pipeline. 2 pipeline libraries with 1 shader each. Second library is a child of a first library
967 		{ {1, { { 0, 1 }, { 1, 1 } } },						"s1_l1_l1"		},	// 1 shader  in a main pipeline. 2 pipeline libraries with 1 shader each. Second library is a child of a first library
968 		{ {0, { { 0, 2 }, { 1, 3 } } },						"s0_l2_l3"		},	// 0 shaders in a main pipeline. 2 pipeline libraries with 2 and 3 shaders respectively. Second library is a child of a first library
969 		{ {3, { { 0, 2 }, { 1, 3 } } },						"s3_l2_l3"		},	// 3 shaders in a main pipeline. 2 pipeline libraries with 2 and 3 shaders respectively. Second library is a child of a first library
970 		{ {3, { { 0, 2 }, { 0, 3 }, { 0, 2 } } },			"s3_l232"		},	// 3 shaders in a main pipeline. 3 pipeline libraries with 2, 3 and 2 shaders respectively.
971 		{ {3, { { 0, 2 }, { 1, 2 }, { 1, 2 }, { 0, 2 } } },	"s3_l22_l22"	},	// 3 shaders in a main pipeline. 4 pipeline libraries with 2 shaders each. Second and third library is a child of a first library
972 	};
973 
974 	struct
975 	{
976 		const TestType	testType;
977 		const char*		suffix;
978 	} testTypeCases[] =
979 	{
980 		{ TestType::DEFAULT,						""									},
981 		{ TestType::CHECK_GROUP_HANDLES,			"_check_group_handles"				},
982 		{ TestType::CHECK_CAPTURE_REPLAY_HANDLES,	"_check_capture_replay_handles"		},
983 		{ TestType::CHECK_ALL_HANDLES,				"_check_all_handles"				},
984 	};
985 
986 	struct
987 	{
988 		const bool		useAABBs;
989 		const char*		suffix;
990 	} geometryTypeCases[] =
991 	{
992 		{ false,	""			},
993 		{ true,		"_aabbs"	},
994 	};
995 
996 	for (size_t threadNdx = 0; threadNdx < DE_LENGTH_OF_ARRAY(threadData); ++threadNdx)
997 	{
998 		de::MovePtr<tcu::TestCaseGroup> threadGroup(new tcu::TestCaseGroup(group->getTestContext(), threadData[threadNdx].name));
999 
1000 		for (size_t libConfigNdx = 0; libConfigNdx < DE_LENGTH_OF_ARRAY(libraryConfigurationData); ++libConfigNdx)
1001 		{
1002 			for (const auto& testTypeCase : testTypeCases)
1003 			{
1004 				for (const auto& geometryCase : geometryTypeCases)
1005 				{
1006 					TestParams testParams
1007 					{
1008 						libraryConfigurationData[libConfigNdx].libraryConfiguration,
1009 						threadData[threadNdx].multithreaded,
1010 						threadData[threadNdx].pipelinesCreatedUsingDHO,
1011 						testTypeCase.testType,
1012 						geometryCase.useAABBs,
1013 						false,
1014 						false,
1015 						false,
1016 						RTPL_DEFAULT_SIZE,
1017 						RTPL_DEFAULT_SIZE
1018 					};
1019 
1020 					const std::string testName = std::string(libraryConfigurationData[libConfigNdx].name) + geometryCase.suffix + testTypeCase.suffix;
1021 					threadGroup->addChild(new RayTracingPipelineLibraryTestCase(group->getTestContext(), testName.c_str(), testParams));
1022 				}
1023 			}
1024 		}
1025 		group->addChild(threadGroup.release());
1026 	}
1027 
1028 	{
1029 		de::MovePtr<tcu::TestCaseGroup> miscGroup(new tcu::TestCaseGroup(group->getTestContext(), "misc", ""));
1030 
1031 		TestParams testParamsMaintenance5
1032 		{
1033 			libraryConfigurationData[1].libraryConfiguration,
1034 			false,
1035 			false,
1036 			TestType::CHECK_CAPTURE_REPLAY_HANDLES,
1037 			false,
1038 			true,
1039 			false,
1040 			true,
1041 			RTPL_DEFAULT_SIZE,
1042 			RTPL_DEFAULT_SIZE
1043 		};
1044 		miscGroup->addChild(new RayTracingPipelineLibraryTestCase(group->getTestContext(), "maintenance5", testParamsMaintenance5));
1045 
1046 		TestParams testParamsUseLinkTimeOpt
1047 		{
1048 			libraryConfigurationData[5].libraryConfiguration,
1049 			false,
1050 			false,
1051 			TestType::DEFAULT,
1052 			true,
1053 			true,
1054 			false,
1055 			false,
1056 			RTPL_DEFAULT_SIZE,
1057 			RTPL_DEFAULT_SIZE
1058 		};
1059 		miscGroup->addChild(new RayTracingPipelineLibraryTestCase(group->getTestContext(), "use_link_time_optimizations", testParamsUseLinkTimeOpt));
1060 
1061 		TestParams testParamsRetainLinkTimeOpt
1062 		{
1063 			libraryConfigurationData[5].libraryConfiguration,
1064 			false,
1065 			false,
1066 			TestType::DEFAULT,
1067 			true,
1068 			true,
1069 			true,
1070 			false,
1071 			RTPL_DEFAULT_SIZE,
1072 			RTPL_DEFAULT_SIZE
1073 		};
1074 		miscGroup->addChild(new RayTracingPipelineLibraryTestCase(group->getTestContext(), "retain_link_time_optimizations", testParamsRetainLinkTimeOpt));
1075 
1076 		group->addChild(miscGroup.release());
1077 	}
1078 }
1079 
createPipelineLibraryTests(tcu::TestContext & testCtx)1080 tcu::TestCaseGroup*	createPipelineLibraryTests(tcu::TestContext& testCtx)
1081 {
1082 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "pipeline_library"));
1083 
1084 	addTestGroup(group.get(), "configurations", addPipelineLibraryConfigurationsTests);
1085 
1086 	return group.release();
1087 }
1088 
1089 }	// RayTracing
1090 
1091 }	// vkt
1092