• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *	  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Acceleration Structures tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingAccelerationStructuresTests.hpp"
25 
26 #include "vkDefs.hpp"
27 #include "deClock.h"
28 #include "deRandom.h"
29 
30 #include "vktTestCase.hpp"
31 #include "vktTestGroupUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkImageWithMemory.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkImageUtil.hpp"
40 #include "vkRayTracingUtil.hpp"
41 #include "tcuVectorUtil.hpp"
42 #include "tcuTexture.hpp"
43 #include "tcuTestLog.hpp"
44 #include "tcuImageCompare.hpp"
45 #include "tcuFloat.hpp"
46 
47 #include <cmath>
48 #include <cstddef>
49 #include <set>
50 #include <limits>
51 #include <iostream>
52 
53 namespace vkt
54 {
55 namespace RayTracing
56 {
57 namespace
58 {
59 using namespace vk;
60 using namespace vkt;
61 using namespace tcu;
62 
63 static const VkFlags	ALL_RAY_TRACING_STAGES	= VK_SHADER_STAGE_RAYGEN_BIT_KHR
64 												| VK_SHADER_STAGE_ANY_HIT_BIT_KHR
65 												| VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR
66 												| VK_SHADER_STAGE_MISS_BIT_KHR
67 												| VK_SHADER_STAGE_INTERSECTION_BIT_KHR
68 												| VK_SHADER_STAGE_CALLABLE_BIT_KHR;
69 
70 
71 enum BottomTestType
72 {
73 	BTT_TRIANGLES,
74 	BTT_AABBS
75 };
76 
77 enum TopTestType
78 {
79 	TTT_IDENTICAL_INSTANCES,
80 	TTT_DIFFERENT_INSTANCES,
81 	TTT_MIX_INSTANCES,
82 };
83 
84 enum OperationTarget
85 {
86 	OT_NONE,
87 	OT_TOP_ACCELERATION,
88 	OT_BOTTOM_ACCELERATION
89 };
90 
91 enum OperationType
92 {
93 	OP_NONE,
94 	OP_COPY,
95 	OP_COMPACT,
96 	OP_SERIALIZE
97 };
98 
99 enum class InstanceCullFlags
100 {
101 	NONE,
102 	CULL_DISABLE,
103 	COUNTERCLOCKWISE,
104 	ALL,
105 };
106 
107 enum class EmptyAccelerationStructureCase
108 {
109 	NOT_EMPTY				= 0,
110 	INACTIVE_TRIANGLES		= 1,
111 	INACTIVE_INSTANCES		= 2,
112 	NO_GEOMETRIES_BOTTOM	= 3,	// geometryCount zero when building.
113 	NO_PRIMITIVES_BOTTOM	= 4,	// primitiveCount zero when building.
114 	NO_PRIMITIVES_TOP		= 5,	// primitiveCount zero when building.
115 };
116 
117 enum class InstanceCustomIndexCase
118 {
119 	NONE			= 0,
120 	CLOSEST_HIT		= 1,
121 	ANY_HIT			= 2,
122 	INTERSECTION	= 3,
123 };
124 
125 static const deUint32 RTAS_DEFAULT_SIZE = 8u;
126 
127 // Chosen to have the most significant bit set to 1 when represented using 24 bits.
128 // This will make sure the instance custom index will not be sign-extended by mistake.
129 constexpr deUint32 INSTANCE_CUSTOM_INDEX_BASE = 0x807f00u;
130 
131 struct TestParams;
132 
133 class TestConfiguration
134 {
135 public:
136 	virtual std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
137 																												 TestParams&						testParams) = 0;
138 	virtual de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
139 																												 TestParams&						testParams,
140 																												 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) = 0;
141 	virtual void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
142 																												 Context&							context,
143 																												TestParams&							testParams) = 0;
144 	virtual void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
145 																												 Context&							context,
146 																												 TestParams&						testParams,
147 																												 VkPipeline							pipeline,
148 																												 deUint32							shaderGroupHandleSize,
149 																												 deUint32							shaderGroupBaseAlignment,
150 																												 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
151 																												 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
152 																												 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) = 0;
153 	virtual bool															verifyImage							(BufferWithMemory*					resultBuffer,
154 																												 Context&							context,
155 																												 TestParams&						testParams) = 0;
156 	virtual VkFormat														getResultImageFormat				() = 0;
157 	virtual size_t															getResultImageFormatSize			() = 0;
158 	virtual VkClearValue													getClearValue						() = 0;
159 };
160 
161 struct TestParams
162 {
163 	vk::VkAccelerationStructureBuildTypeKHR	buildType;		// are we making AS on CPU or GPU
164 	VkFormat								vertexFormat;
165 	bool									padVertices;
166 	VkIndexType								indexType;
167 	BottomTestType							bottomTestType; // what kind of geometry is stored in bottom AS
168 	InstanceCullFlags						cullFlags;		// Flags for instances, if needed.
169 	bool									bottomUsesAOP;	// does bottom AS use arrays, or arrays of pointers
170 	bool									bottomGeneric;	// Bottom created as generic AS type.
171 	TopTestType								topTestType;	// If instances are identical then bottom geometries must have different vertices/aabbs
172 	bool									topUsesAOP;		// does top AS use arrays, or arrays of pointers
173 	bool									topGeneric;		// Top created as generic AS type.
174 	VkBuildAccelerationStructureFlagsKHR	buildFlags;
175 	OperationTarget							operationTarget;
176 	OperationType							operationType;
177 	deUint32								width;
178 	deUint32								height;
179 	de::SharedPtr<TestConfiguration>		testConfiguration;
180 	deUint32								workerThreadsCount;
181 	EmptyAccelerationStructureCase			emptyASCase;
182 	InstanceCustomIndexCase					instanceCustomIndexCase;
183 	bool									useCullMask;
184 	uint32_t								cullMask;
185 };
186 
getShaderGroupSize(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)187 deUint32 getShaderGroupSize (const InstanceInterface&	vki,
188 							 const VkPhysicalDevice		physicalDevice)
189 {
190 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
191 
192 	rayTracingPropertiesKHR	= makeRayTracingProperties(vki, physicalDevice);
193 	return rayTracingPropertiesKHR->getShaderGroupHandleSize();
194 }
195 
getShaderGroupBaseAlignment(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)196 deUint32 getShaderGroupBaseAlignment (const InstanceInterface&	vki,
197 									  const VkPhysicalDevice	physicalDevice)
198 {
199 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
200 
201 	rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
202 	return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
203 }
204 
makeImageCreateInfo(deUint32 width,deUint32 height,VkFormat format)205 VkImageCreateInfo makeImageCreateInfo (deUint32 width, deUint32 height, VkFormat format)
206 {
207 	const VkImageCreateInfo			imageCreateInfo			=
208 	{
209 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,																// VkStructureType			sType;
210 		DE_NULL,																							// const void*				pNext;
211 		(VkImageCreateFlags)0u,																				// VkImageCreateFlags		flags;
212 		VK_IMAGE_TYPE_2D,																					// VkImageType				imageType;
213 		format,																								// VkFormat					format;
214 		makeExtent3D(width, height, 1u),																	// VkExtent3D				extent;
215 		1u,																									// deUint32					mipLevels;
216 		1u,																									// deUint32					arrayLayers;
217 		VK_SAMPLE_COUNT_1_BIT,																				// VkSampleCountFlagBits	samples;
218 		VK_IMAGE_TILING_OPTIMAL,																			// VkImageTiling			tiling;
219 		VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
220 		VK_SHARING_MODE_EXCLUSIVE,																			// VkSharingMode			sharingMode;
221 		0u,																									// deUint32					queueFamilyIndexCount;
222 		DE_NULL,																							// const deUint32*			pQueueFamilyIndices;
223 		VK_IMAGE_LAYOUT_UNDEFINED																			// VkImageLayout			initialLayout;
224 	};
225 
226 	return imageCreateInfo;
227 }
228 
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,deUint32 queryCount)229 Move<VkQueryPool> makeQueryPool(const DeviceInterface&		vk,
230 								const VkDevice				device,
231 								const VkQueryType			queryType,
232 								deUint32					queryCount)
233 {
234 	const VkQueryPoolCreateInfo				queryPoolCreateInfo =
235 	{
236 		VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,		// sType
237 		DE_NULL,										// pNext
238 		(VkQueryPoolCreateFlags)0,						// flags
239 		queryType,										// queryType
240 		queryCount,										// queryCount
241 		0u,												// pipelineStatistics
242 	};
243 	return createQueryPool(vk, device, &queryPoolCreateInfo);
244 }
245 
getCullFlags(InstanceCullFlags flags)246 VkGeometryInstanceFlagsKHR getCullFlags (InstanceCullFlags flags)
247 {
248 	VkGeometryInstanceFlagsKHR cullFlags = 0u;
249 
250 	if (flags == InstanceCullFlags::CULL_DISABLE || flags == InstanceCullFlags::ALL)
251 		cullFlags |= VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
252 
253 	if (flags == InstanceCullFlags::COUNTERCLOCKWISE || flags == InstanceCullFlags::ALL)
254 		cullFlags |= VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR;
255 
256 	return cullFlags;
257 }
258 
259 class CheckerboardConfiguration : public TestConfiguration
260 {
261 public:
262 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
263 																										 TestParams&						testParams) override;
264 	de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
265 																										 TestParams&						testParams,
266 																										 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) override;
267 	void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
268 																										 Context&							context,
269 																										 TestParams&						testParams) override;
270 	void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
271 																										 Context&							context,
272 																										 TestParams&						testParams,
273 																										 VkPipeline							pipeline,
274 																										 deUint32							shaderGroupHandleSize,
275 																										 deUint32							shaderGroupBaseAlignment,
276 																										 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
277 																										 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
278 																										 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) override;
279 	bool															verifyImage							(BufferWithMemory*					resultBuffer,
280 																										 Context&							context,
281 																										 TestParams&						testParams) override;
282 	VkFormat														getResultImageFormat				() override;
283 	size_t															getResultImageFormatSize			() override;
284 	VkClearValue													getClearValue						() override;
285 };
286 
initBottomAccelerationStructures(Context & context,TestParams & testParams)287 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > CheckerboardConfiguration::initBottomAccelerationStructures (Context&			context,
288 																														   TestParams&		testParams)
289 {
290 	DE_UNREF(context);
291 
292 	// Cull flags can only be used with triangles.
293 	DE_ASSERT(testParams.cullFlags == InstanceCullFlags::NONE || testParams.bottomTestType == BTT_TRIANGLES);
294 
295 	// Checkerboard configuration does not support empty geometry tests.
296 	DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
297 
298 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
299 
300 	const auto instanceFlags = getCullFlags(testParams.cullFlags);
301 
302 	tcu::Vec3 v0(0.0, 1.0, 0.0);
303 	tcu::Vec3 v1(0.0, 0.0, 0.0);
304 	tcu::Vec3 v2(1.0, 1.0, 0.0);
305 	tcu::Vec3 v3(1.0, 0.0, 0.0);
306 
307 	if (testParams.topTestType == TTT_DIFFERENT_INSTANCES)
308 	{
309 		de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
310 		bottomLevelAccelerationStructure->setGeometryCount(1u);
311 		de::SharedPtr<RaytracedGeometryBase> geometry;
312 		if (testParams.bottomTestType == BTT_TRIANGLES)
313 		{
314 			geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
315 			if (testParams.indexType == VK_INDEX_TYPE_NONE_KHR)
316 			{
317 				if (instanceFlags == 0u)
318 				{
319 					geometry->addVertex(v0);
320 					geometry->addVertex(v1);
321 					geometry->addVertex(v2);
322 					geometry->addVertex(v2);
323 					geometry->addVertex(v1);
324 					geometry->addVertex(v3);
325 				}
326 				else // Counterclockwise so the flags will be needed for the geometry to be visible.
327 				{
328 					geometry->addVertex(v2);
329 					geometry->addVertex(v1);
330 					geometry->addVertex(v0);
331 					geometry->addVertex(v3);
332 					geometry->addVertex(v1);
333 					geometry->addVertex(v2);
334 				}
335 			}
336 			else // m_data.indexType != VK_INDEX_TYPE_NONE_KHR
337 			{
338 				geometry->addVertex(v0);
339 				geometry->addVertex(v1);
340 				geometry->addVertex(v2);
341 				geometry->addVertex(v3);
342 
343 				if (instanceFlags == 0u)
344 				{
345 					geometry->addIndex(0);
346 					geometry->addIndex(1);
347 					geometry->addIndex(2);
348 					geometry->addIndex(2);
349 					geometry->addIndex(1);
350 					geometry->addIndex(3);
351 				}
352 				else // Counterclockwise so the flags will be needed for the geometry to be visible.
353 				{
354 					geometry->addIndex(2);
355 					geometry->addIndex(1);
356 					geometry->addIndex(0);
357 					geometry->addIndex(3);
358 					geometry->addIndex(1);
359 					geometry->addIndex(2);
360 				}
361 			}
362 		}
363 		else // m_data.bottomTestType == BTT_AABBS
364 		{
365 			geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_AABBS_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
366 
367 			if (!testParams.padVertices)
368 			{
369 				// Single AABB.
370 				geometry->addVertex(tcu::Vec3(0.0f, 0.0f, -0.1f));
371 				geometry->addVertex(tcu::Vec3(1.0f, 1.0f,  0.1f));
372 			}
373 			else
374 			{
375 				// Multiple AABBs covering the same space.
376 				geometry->addVertex(tcu::Vec3(0.0f, 0.0f, -0.1f));
377 				geometry->addVertex(tcu::Vec3(0.5f, 0.5f,  0.1f));
378 
379 				geometry->addVertex(tcu::Vec3(0.5f, 0.5f, -0.1f));
380 				geometry->addVertex(tcu::Vec3(1.0f, 1.0f,  0.1f));
381 
382 				geometry->addVertex(tcu::Vec3(0.0f, 0.5f, -0.1f));
383 				geometry->addVertex(tcu::Vec3(0.5f, 1.0f,  0.1f));
384 
385 				geometry->addVertex(tcu::Vec3(0.5f, 0.0f, -0.1f));
386 				geometry->addVertex(tcu::Vec3(1.0f, 0.5f,  0.1f));
387 			}
388 		}
389 
390 		bottomLevelAccelerationStructure->addGeometry(geometry);
391 
392 		if (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT)
393 			geometry->setGeometryFlags(VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR);
394 
395 		result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
396 	}
397 	else // m_data.topTestType == TTT_IDENTICAL_INSTANCES
398 	{
399 		// triangle and aabb tests use geometries/aabbs with different vertex positions and the same identity matrix in each instance data
400 		for (deUint32 y = 0; y < testParams.height; ++y)
401 		for (deUint32 x = 0; x < testParams.width; ++x)
402 		{
403 			// let's build a chessboard of geometries
404 			if (((x + y) % 2) == 0)
405 				continue;
406 			tcu::Vec3 xyz((float)x, (float)y, 0.0f);
407 
408 			de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
409 			bottomLevelAccelerationStructure->setGeometryCount(1u);
410 
411 			de::SharedPtr<RaytracedGeometryBase> geometry;
412 			if (testParams.bottomTestType == BTT_TRIANGLES)
413 			{
414 				geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
415 				if (testParams.indexType == VK_INDEX_TYPE_NONE_KHR)
416 				{
417 					if (instanceFlags == 0u)
418 					{
419 						geometry->addVertex(xyz + v0);
420 						geometry->addVertex(xyz + v1);
421 						geometry->addVertex(xyz + v2);
422 						geometry->addVertex(xyz + v2);
423 						geometry->addVertex(xyz + v1);
424 						geometry->addVertex(xyz + v3);
425 					}
426 					else // Counterclockwise so the flags will be needed for the geometry to be visible.
427 					{
428 						geometry->addVertex(xyz + v2);
429 						geometry->addVertex(xyz + v1);
430 						geometry->addVertex(xyz + v0);
431 						geometry->addVertex(xyz + v3);
432 						geometry->addVertex(xyz + v1);
433 						geometry->addVertex(xyz + v2);
434 					}
435 				}
436 				else
437 				{
438 					geometry->addVertex(xyz + v0);
439 					geometry->addVertex(xyz + v1);
440 					geometry->addVertex(xyz + v2);
441 					geometry->addVertex(xyz + v3);
442 
443 					if (instanceFlags == 0u)
444 					{
445 						geometry->addIndex(0);
446 						geometry->addIndex(1);
447 						geometry->addIndex(2);
448 						geometry->addIndex(2);
449 						geometry->addIndex(1);
450 						geometry->addIndex(3);
451 					}
452 					else // Counterclockwise so the flags will be needed for the geometry to be visible.
453 					{
454 						geometry->addIndex(2);
455 						geometry->addIndex(1);
456 						geometry->addIndex(0);
457 						geometry->addIndex(3);
458 						geometry->addIndex(1);
459 						geometry->addIndex(2);
460 					}
461 				}
462 			}
463 			else // testParams.bottomTestType == BTT_AABBS
464 			{
465 				geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_AABBS_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
466 
467 				if (!testParams.padVertices)
468 				{
469 					// Single AABB.
470 					geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.0f, -0.1f));
471 					geometry->addVertex(xyz + tcu::Vec3(1.0f, 1.0f,  0.1f));
472 				}
473 				else
474 				{
475 					// Multiple AABBs covering the same space.
476 					geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.0f, -0.1f));
477 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.5f,  0.1f));
478 
479 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.5f, -0.1f));
480 					geometry->addVertex(xyz + tcu::Vec3(1.0f, 1.0f,  0.1f));
481 
482 					geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.5f, -0.1f));
483 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 1.0f,  0.1f));
484 
485 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.0f, -0.1f));
486 					geometry->addVertex(xyz + tcu::Vec3(1.0f, 0.5f,  0.1f));
487 				}
488 			}
489 
490 			bottomLevelAccelerationStructure->addGeometry(geometry);
491 
492 			if (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT)
493 				geometry->setGeometryFlags(VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR);
494 
495 			result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
496 		}
497 	}
498 
499 	return result;
500 }
501 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)502 de::MovePtr<TopLevelAccelerationStructure> CheckerboardConfiguration::initTopAccelerationStructure (Context&		context,
503 																									TestParams&		testParams,
504 																									std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >& bottomLevelAccelerationStructures)
505 {
506 	// Checkerboard configuration does not support empty geometry tests.
507 	DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
508 
509 	DE_UNREF(context);
510 
511 	const auto instanceCount = testParams.width * testParams.height / 2u;
512 	const auto instanceFlags = getCullFlags(testParams.cullFlags);
513 
514 	de::MovePtr<TopLevelAccelerationStructure>	result = makeTopLevelAccelerationStructure();
515 	result->setInstanceCount(instanceCount);
516 
517 	if (testParams.topTestType == TTT_DIFFERENT_INSTANCES)
518 	{
519 
520 		for (deUint32 y = 0; y < testParams.height; ++y)
521 		for (deUint32 x = 0; x < testParams.width; ++x)
522 		{
523 			if (((x + y) % 2) == 0)
524 				continue;
525 			const VkTransformMatrixKHR			transformMatrixKHR =
526 			{
527 				{								//  float	matrix[3][4];
528 					{ 1.0f, 0.0f, 0.0f, (float)x },
529 					{ 0.0f, 1.0f, 0.0f, (float)y },
530 					{ 0.0f, 0.0f, 1.0f, 0.0f },
531 				}
532 			};
533 			const deUint32 instanceCustomIndex = ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ? (INSTANCE_CUSTOM_INDEX_BASE + x + y) : 0u);
534 			result->addInstance(bottomLevelAccelerationStructures[0], transformMatrixKHR, instanceCustomIndex, 0xFFu, 0u, instanceFlags);
535 		}
536 	}
537 	else // testParams.topTestType == TTT_IDENTICAL_INSTANCES
538 	{
539 		deUint32 currentInstanceIndex = 0;
540 
541 		for (deUint32 y = 0; y < testParams.height; ++y)
542 		for (deUint32 x = 0; x < testParams.width; ++x)
543 		{
544 			if (((x + y) % 2) == 0)
545 				continue;
546 			const deUint32 instanceCustomIndex = ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ? (INSTANCE_CUSTOM_INDEX_BASE + x + y) : 0u);
547 
548 			if (testParams.useCullMask)
549 			{
550 				result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex++], identityMatrix3x4, instanceCustomIndex, testParams.cullMask, 0u, instanceFlags);
551 			}
552 			else
553 			{
554 				result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex++], identityMatrix3x4, instanceCustomIndex, 0xFFu, 0u, instanceFlags);
555 			}
556 		}
557 	}
558 
559 	return result;
560 }
561 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)562 void CheckerboardConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline>&		rayTracingPipeline,
563 													  Context&								context,
564 													  TestParams&							testParams)
565 {
566 	DE_UNREF(testParams);
567 	const DeviceInterface&						vkd						= context.getDeviceInterface();
568 	const VkDevice								device					= context.getDevice();
569 
570 	const bool useAnyHit		= (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT);
571 	const auto hitShaderStage	= (useAnyHit ? VK_SHADER_STAGE_ANY_HIT_BIT_KHR : VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
572 	const auto hitShaderName	= (useAnyHit ? "ahit" : "chit");
573 
574 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		createShaderModule(vkd, device, context.getBinaryCollection().get("rgen"),  0), 0);
575 	rayTracingPipeline->addShader(hitShaderStage,						createShaderModule(vkd, device, context.getBinaryCollection().get(hitShaderName),  0), 1);
576 	rayTracingPipeline->addShader(hitShaderStage,						createShaderModule(vkd, device, context.getBinaryCollection().get(hitShaderName),  0), 2);
577 	if (testParams.bottomTestType == BTT_AABBS)
578 		rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR,	createShaderModule(vkd, device, context.getBinaryCollection().get("isect"), 0), 2);
579 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			createShaderModule(vkd, device, context.getBinaryCollection().get("miss"),  0), 3);
580 }
581 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,deUint32 shaderGroupHandleSize,deUint32 shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)582 void CheckerboardConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
583 														Context&							context,
584 														TestParams&							testParams,
585 														VkPipeline							pipeline,
586 														deUint32							shaderGroupHandleSize,
587 														deUint32							shaderGroupBaseAlignment,
588 														de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
589 														de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
590 														de::MovePtr<BufferWithMemory>&		missShaderBindingTable)
591 {
592 	const DeviceInterface&						vkd						= context.getDeviceInterface();
593 	const VkDevice								device					= context.getDevice();
594 	Allocator&									allocator				= context.getDefaultAllocator();
595 
596 	raygenShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
597 	if(testParams.bottomTestType == BTT_AABBS)
598 		hitShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1 );
599 	else // testParams.bottomTestType == BTT_TRIANGLES
600 		hitShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
601 	missShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 3, 1 );
602 }
603 
bitfieldReverse(deUint32 num)604 deUint32 bitfieldReverse(deUint32 num)
605 {
606 	deUint32 reverse_num = 0;
607 	deUint32 i;
608 	for (i = 0; i < 32; i++)
609 	{
610 		if((num & (1 << i)))
611 	reverse_num |= 1 << ((32 - 1) - i);
612 	}
613 	return reverse_num;
614 }
615 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)616 bool CheckerboardConfiguration::verifyImage(BufferWithMemory* resultBuffer, Context& context, TestParams& testParams)
617 {
618 	// Checkerboard configuration does not support empty geometry tests.
619 	DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
620 
621 	DE_UNREF(context);
622 	const auto*						bufferPtr		= (deInt32*)resultBuffer->getAllocation().getHostPtr();
623 	deUint32						pos				= 0;
624 	deUint32						failures		= 0;
625 
626 	// verify results - each test case should generate checkerboard pattern
627 	for (deUint32 y = 0; y < testParams.height; ++y)
628 	for (deUint32 x = 0; x < testParams.width; ++x)
629 	{
630 		// The hit value should match the shader code.
631 		if (testParams.useCullMask)
632 		{
633 			const deInt32 hitValue			= testParams.cullMask & 0x000000FFu; // only 8 last bits are used by the cullMask
634 			const deInt32 expectedResult	= ((x + y) % 2) ? hitValue : bitfieldReverse(testParams.cullMask &  0x000000FFu);
635 
636 			if (bufferPtr[pos] != expectedResult)
637 				failures++;
638 		}
639 		else
640 		{
641 			const deInt32 hitValue			= ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ? static_cast<deInt32>(INSTANCE_CUSTOM_INDEX_BASE + x + y) : 2);
642 			const deInt32 expectedResult	= ((x + y) % 2) ? hitValue : 1;
643 
644 			if (bufferPtr[pos] != expectedResult)
645 				failures++;
646 		}
647 
648 		++pos;
649 	}
650 	return failures == 0;
651 }
652 
getResultImageFormat()653 VkFormat CheckerboardConfiguration::getResultImageFormat()
654 {
655 	return VK_FORMAT_R32_SINT;
656 }
657 
getResultImageFormatSize()658 size_t CheckerboardConfiguration::getResultImageFormatSize()
659 {
660 	return sizeof(deUint32);
661 }
662 
getClearValue()663 VkClearValue CheckerboardConfiguration::getClearValue()
664 {
665 	return makeClearValueColorU32(0xFF, 0u, 0u, 0u);
666 }
667 
668 class SingleTriangleConfiguration : public TestConfiguration
669 {
670 public:
671 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
672 																										 TestParams&						testParams) override;
673 	de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
674 																										 TestParams&						testParams,
675 																										 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) override;
676 	void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
677 																										 Context&							context,
678 																										 TestParams&						testParams) override;
679 	void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
680 																										 Context&							context,
681 																										 TestParams&						testParams,
682 																										 VkPipeline							pipeline,
683 																										 deUint32							shaderGroupHandleSize,
684 																										 deUint32							shaderGroupBaseAlignment,
685 																										 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
686 																										 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
687 																										 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) override;
688 	bool															verifyImage							(BufferWithMemory*					resultBuffer,
689 																										 Context&							context,
690 																										 TestParams&						testParams) override;
691 	VkFormat														getResultImageFormat				() override;
692 	size_t															getResultImageFormatSize			() override;
693 	VkClearValue													getClearValue						() override;
694 
695 	// well, actually we have 2 triangles, but we ignore the first one ( see raygen shader for this configuration )
696 	const std::vector<tcu::Vec3> vertices =
697 	{
698 		tcu::Vec3(0.0f, 0.0f, -0.1f),
699 		tcu::Vec3(-0.1f, 0.0f, 0.0f),
700 		tcu::Vec3(0.0f, -0.1f, 0.0f),
701 		tcu::Vec3(0.0f, 0.0f, 0.0f),
702 		tcu::Vec3(0.5f, 0.0f, -0.5f),
703 		tcu::Vec3(0.0f, 0.5f, -0.5f),
704 	};
705 
706 	const std::vector<deUint32> indices =
707 	{
708 		3,
709 		4,
710 		5
711 	};
712 	// Different vertex configurations of a triangle whose parameter x is set to NaN during inactive_triangles tests
713 	const bool nanConfig[7][3] =
714 	{
715 		{ true,		true,		true	},
716 		{ true,		false,		false	},
717 		{ false,	true,		false	},
718 		{ false,	false,		true	},
719 		{ true,		true,		false	},
720 		{ false,	true,		true	},
721 		{ true,		false,		true	},
722 	};
723 };
724 
initBottomAccelerationStructures(Context & context,TestParams & testParams)725 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > SingleTriangleConfiguration::initBottomAccelerationStructures (Context&			context,
726 																															 TestParams&		testParams)
727 {
728 	DE_UNREF(context);
729 
730 	// No other cases supported for the single triangle configuration.
731 	DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
732 
733 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
734 
735 	de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
736 
737 	unsigned int geometryCount = testParams.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_TRIANGLES ? 4U : 1U;
738 
739 	if (testParams.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_TRIANGLES)
740 	{
741 		bottomLevelAccelerationStructure->setGeometryCount(geometryCount);
742 
743 		de::SharedPtr<RaytracedGeometryBase> geometry;
744 		geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType);
745 
746 		for (unsigned int i = 0; i < geometryCount; i++)
747 		{
748 			auto customVertices(vertices);
749 
750 			const auto nanValue = tcu::Float32::nan().asFloat();
751 
752 			if (nanConfig[i][0])
753 				customVertices[3].x() = nanValue;
754 			if (nanConfig[i][1])
755 				customVertices[4].x() = nanValue;
756 			if (nanConfig[i][2])
757 				customVertices[5].x() = nanValue;
758 
759 			for (auto it = begin(customVertices), eit = end(customVertices); it != eit; ++it)
760 				geometry->addVertex(*it);
761 
762 			if (testParams.indexType != VK_INDEX_TYPE_NONE_KHR)
763 			{
764 				for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
765 					geometry->addIndex(*it);
766 			}
767 			bottomLevelAccelerationStructure->addGeometry(geometry);
768 		}
769 	}
770 	else
771 	{
772 		bottomLevelAccelerationStructure->setGeometryCount(geometryCount);
773 
774 		de::SharedPtr<RaytracedGeometryBase> geometry;
775 		geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType);
776 
777 		for (auto it = begin(vertices), eit = end(vertices); it != eit; ++it)
778 			geometry->addVertex(*it);
779 
780 		if (testParams.indexType != VK_INDEX_TYPE_NONE_KHR)
781 		{
782 			for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
783 				geometry->addIndex(*it);
784 		}
785 		bottomLevelAccelerationStructure->addGeometry(geometry);
786 	}
787 
788 	result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
789 
790 	return result;
791 }
792 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)793 de::MovePtr<TopLevelAccelerationStructure> SingleTriangleConfiguration::initTopAccelerationStructure (Context&			context,
794 																									  TestParams&		testParams,
795 																									  std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >& bottomLevelAccelerationStructures)
796 {
797 	DE_UNREF(context);
798 	DE_UNREF(testParams);
799 
800 	// Unsupported in this configuration.
801 	DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
802 
803 	de::MovePtr<TopLevelAccelerationStructure>	result = makeTopLevelAccelerationStructure();
804 	result->setInstanceCount(1u);
805 
806 	result->addInstance(bottomLevelAccelerationStructures[0]);
807 
808 	return result;
809 }
810 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)811 void SingleTriangleConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline>&		rayTracingPipeline,
812 														Context&								context,
813 														TestParams&								testParams)
814 {
815 	DE_UNREF(testParams);
816 	const DeviceInterface&						vkd						= context.getDeviceInterface();
817 	const VkDevice								device					= context.getDevice();
818 
819 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		createShaderModule(vkd, device, context.getBinaryCollection().get("rgen_depth"),  0), 0);
820 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	createShaderModule(vkd, device, context.getBinaryCollection().get("chit_depth"),  0), 1);
821 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			createShaderModule(vkd, device, context.getBinaryCollection().get("miss_depth"),  0), 2);
822 }
823 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,deUint32 shaderGroupHandleSize,deUint32 shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)824 void SingleTriangleConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
825 														  Context&							context,
826 														  TestParams&						testParams,
827 														  VkPipeline						pipeline,
828 														  deUint32							shaderGroupHandleSize,
829 														  deUint32							shaderGroupBaseAlignment,
830 														  de::MovePtr<BufferWithMemory>&	raygenShaderBindingTable,
831 														  de::MovePtr<BufferWithMemory>&	hitShaderBindingTable,
832 														  de::MovePtr<BufferWithMemory>&	missShaderBindingTable)
833 {
834 	DE_UNREF(testParams);
835 	const DeviceInterface&						vkd						= context.getDeviceInterface();
836 	const VkDevice								device					= context.getDevice();
837 	Allocator&									allocator				= context.getDefaultAllocator();
838 
839 	raygenShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
840 	hitShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
841 	missShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1 );
842 }
843 
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)844 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
845 {
846 	float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
847 	float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
848 
849 	if ((s < 0) != (t < 0))
850 		return false;
851 
852 	float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
853 
854 	return a < 0 ?
855 		(s <= 0 && s + t >= a) :
856 		(s >= 0 && s + t <= a);
857 }
858 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)859 bool SingleTriangleConfiguration::verifyImage(BufferWithMemory* resultBuffer, Context& context, TestParams& testParams)
860 {
861 	tcu::TextureFormat			imageFormat		= vk::mapVkFormat(getResultImageFormat());
862 	tcu::TextureFormat			vertexFormat	= vk::mapVkFormat(testParams.vertexFormat);
863 	tcu::ConstPixelBufferAccess	resultAccess	(imageFormat, testParams.width, testParams.height, 1, resultBuffer->getAllocation().getHostPtr());
864 
865 	std::vector<float>			reference		(testParams.width * testParams.height);
866 	tcu::PixelBufferAccess		referenceAccess	(imageFormat, testParams.width, testParams.height, 1, reference.data());
867 
868 	// verify results
869 	tcu::Vec3					v0				= vertices[3];
870 	tcu::Vec3					v1				= vertices[4];
871 	tcu::Vec3					v2				= vertices[5];
872 	const int					numChannels		= tcu::getNumUsedChannels(vertexFormat.order);
873 	if (numChannels < 3)
874 	{
875 		v0.z() = 0.0f;
876 		v1.z() = 0.0f;
877 		v2.z() = 0.0f;
878 	}
879 	tcu::Vec3					abc				= tcu::cross((v2 - v0), (v1 - v0));
880 
881 	for (deUint32 j = 0; j < testParams.height; ++j)
882 	{
883 		float y = 0.1f + 0.2f * float(j) / float(testParams.height - 1);
884 		for (deUint32 i = 0; i < testParams.width; ++i)
885 		{
886 			float	x			= 0.1f + 0.2f * float(i) / float(testParams.width - 1);
887 			float	z			= (abc.x()*x + abc.y()*y) / abc.z();
888 			bool	inTriangle	= pointInTriangle2D(tcu::Vec3(x, y, z), v0, v1, v2);
889 			float	refValue	= ((inTriangle && testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY) ? 1.0f+z : 0.0f);
890 			referenceAccess.setPixel(tcu::Vec4(refValue, 0.0f, 0.0f, 1.0f), i, j);
891 		}
892 	}
893 	return tcu::floatThresholdCompare(context.getTestContext().getLog(), "Result comparison", "", referenceAccess, resultAccess, tcu::Vec4(0.01f), tcu::COMPARE_LOG_EVERYTHING);
894 }
895 
getResultImageFormat()896 VkFormat SingleTriangleConfiguration::getResultImageFormat()
897 {
898 	return VK_FORMAT_R32_SFLOAT;
899 }
900 
getResultImageFormatSize()901 size_t SingleTriangleConfiguration::getResultImageFormatSize()
902 {
903 	return sizeof(float);
904 }
905 
getClearValue()906 VkClearValue SingleTriangleConfiguration::getClearValue()
907 {
908 	return makeClearValueColorF32(32.0f, 0.0f, 0.0f, 0.0f);
909 }
910 
commonASTestsCheckSupport(Context & context)911 void commonASTestsCheckSupport(Context& context)
912 {
913 	context.requireInstanceFunctionality("VK_KHR_get_physical_device_properties2");
914 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
915 	context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
916 
917 	const VkPhysicalDeviceRayTracingPipelineFeaturesKHR&	rayTracingPipelineFeaturesKHR = context.getRayTracingPipelineFeatures();
918 	if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == DE_FALSE)
919 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
920 
921 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
922 	if (accelerationStructureFeaturesKHR.accelerationStructure == DE_FALSE)
923 		TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
924 }
925 
926 class RayTracingASBasicTestCase : public TestCase
927 {
928 public:
929 																	RayTracingASBasicTestCase			(tcu::TestContext& context, const char* name, const char* desc, const TestParams& data);
930 																	~RayTracingASBasicTestCase			(void);
931 
932 	void															checkSupport						(Context& context) const override;
933 	void															initPrograms						(SourceCollections& programCollection) const override;
934 	TestInstance*													createInstance						(Context& context) const override;
935 protected:
936 	TestParams														m_data;
937 };
938 
939 // Same as RayTracingASBasicTestCase but it will only initialize programs for SingleTriangleConfiguration and use hand-tuned SPIR-V
940 // assembly.
941 class RayTracingASFuncArgTestCase : public RayTracingASBasicTestCase
942 {
943 public:
944 																	RayTracingASFuncArgTestCase			(tcu::TestContext& context, const char* name, const char* desc, const TestParams& data);
~RayTracingASFuncArgTestCase(void)945 																	~RayTracingASFuncArgTestCase		(void) {}
946 
947 	void															initPrograms						(SourceCollections& programCollection) const override;
948 };
949 
950 class RayTracingASBasicTestInstance : public TestInstance
951 {
952 public:
953 																	RayTracingASBasicTestInstance		(Context& context, const TestParams& data);
954 																	~RayTracingASBasicTestInstance		(void) = default;
955 	tcu::TestStatus													iterate								(void) override;
956 
957 protected:
958 	bool															iterateNoWorkers					(void);
959 	bool															iterateWithWorkers					(void);
960 	de::MovePtr<BufferWithMemory>									runTest								(const deUint32 workerThreadsCount);
961 private:
962 	TestParams														m_data;
963 };
964 
RayTracingASBasicTestCase(tcu::TestContext & context,const char * name,const char * desc,const TestParams & data)965 RayTracingASBasicTestCase::RayTracingASBasicTestCase (tcu::TestContext& context, const char* name, const char* desc, const TestParams& data)
966 	: vkt::TestCase	(context, name, desc)
967 	, m_data		(data)
968 {
969 }
970 
~RayTracingASBasicTestCase(void)971 RayTracingASBasicTestCase::~RayTracingASBasicTestCase	(void)
972 {
973 }
974 
checkSupport(Context & context) const975 void RayTracingASBasicTestCase::checkSupport(Context& context) const
976 {
977 	commonASTestsCheckSupport(context);
978 
979 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
980 	if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
981 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
982 
983 	if (m_data.useCullMask)
984 		context.requireDeviceFunctionality("VK_KHR_ray_tracing_maintenance1");
985 
986 	// Check supported vertex format.
987 	checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(), m_data.vertexFormat);
988 }
989 
initPrograms(SourceCollections & programCollection) const990 void RayTracingASBasicTestCase::initPrograms (SourceCollections& programCollection) const
991 {
992 	bool storeInRGen = false;
993 	bool storeInAHit = false;
994 	bool storeInCHit = false;
995 	bool storeInISec = false;
996 
997 	switch (m_data.instanceCustomIndexCase)
998 	{
999 	case InstanceCustomIndexCase::NONE:			storeInRGen = true;	break;
1000 	case InstanceCustomIndexCase::CLOSEST_HIT:	storeInCHit = true; break;
1001 	case InstanceCustomIndexCase::ANY_HIT:		storeInAHit = true;	break;
1002 	case InstanceCustomIndexCase::INTERSECTION:	storeInISec = true; break;
1003 	default: DE_ASSERT(false); break;
1004 	}
1005 
1006 
1007 	const std::string				imageDeclaration	= "layout(r32i, set = 0, binding = 0) uniform iimage2D result;\n";
1008 	const std::string				storeCustomIndex	= "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(gl_InstanceCustomIndexEXT, 0, 0, 1));\n";
1009 	const std::string				storeCullMask		= "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(gl_CullMaskEXT, 0, 0, 1));\n";
1010 	const vk::ShaderBuildOptions	buildOptions		(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
1011 
1012 	{
1013 		std::stringstream css;
1014 		css
1015 			<< "#version 460 core\n"
1016 			<< "#extension GL_EXT_ray_tracing : require\n"
1017 			<< "layout(location = 0) rayPayloadEXT ivec4 hitValue;\n";
1018 
1019 		if (storeInRGen)
1020 			css << imageDeclaration;
1021 
1022 		css
1023 			<< "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
1024 			<< "\n"
1025 			<< "void main()\n"
1026 			<< "{\n"
1027 			<< "  float tmin      = 0.0;\n"
1028 			<< "  float tmax      = 1.0;\n"
1029 			<< "  vec3  origin    = vec3(float(gl_LaunchIDEXT.x) + 0.5f, float(gl_LaunchIDEXT.y) + 0.5f, 0.5);\n"
1030 			<< "  vec3  direction = vec3(0.0,0.0,-1.0);\n"
1031 			<< "  hitValue        = ivec4(0,0,0,0);\n"
1032 			<< "  traceRayEXT(topLevelAS, " << ((m_data.cullFlags == InstanceCullFlags::NONE) ? "0, " : "gl_RayFlagsCullBackFacingTrianglesEXT, ") << m_data.cullMask << ", 0, 0, 0, origin, tmin, direction, tmax, 0);\n";
1033 
1034 		if (storeInRGen)
1035 			css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n";
1036 
1037 		css << "}\n";
1038 
1039 		programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
1040 	}
1041 
1042 	{
1043 		std::stringstream css;
1044 		css
1045 			<< "#version 460 core\n"
1046 			<< "#extension GL_EXT_ray_tracing : require\n"
1047 			<< ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1048 			<< "layout(location = 0) rayPayloadInEXT ivec4 hitValue;\n";
1049 
1050 		if (storeInCHit)
1051 			css << imageDeclaration;
1052 
1053 		css
1054 			<< "void main()\n"
1055 			<< "{\n"
1056 			<< "  hitValue = ivec4(2,0,0,1);\n";
1057 
1058 		if (storeInCHit)
1059 		{
1060 			if (m_data.useCullMask)
1061 			{
1062 				css << storeCullMask;
1063 			}
1064 			else
1065 			{
1066 				css << storeCustomIndex;
1067 			}
1068 		}
1069 
1070 		css << "}\n";
1071 
1072 		programCollection.glslSources.add("chit") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1073 	}
1074 
1075 	if (storeInAHit)
1076 	{
1077 		std::stringstream css;
1078 		css
1079 			<< "#version 460 core\n"
1080 			<< "#extension GL_EXT_ray_tracing : require\n"
1081 			<< ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1082 			<< imageDeclaration
1083 			<< "void main()\n"
1084 			<< "{\n"
1085 			<< ((m_data.useCullMask) ? storeCullMask : storeCustomIndex)
1086 			<< "}\n";
1087 
1088 		programCollection.glslSources.add("ahit") << glu::AnyHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1089 	}
1090 
1091 	{
1092 		std::stringstream css;
1093 		css
1094 			<< "#version 460 core\n"
1095 			<< "#extension GL_EXT_ray_tracing : require\n"
1096 			<< ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1097 			<< "hitAttributeEXT ivec4 hitAttribute;\n";
1098 
1099 		if (storeInISec)
1100 			css << imageDeclaration;
1101 
1102 		css
1103 			<< "void main()\n"
1104 			<< "{\n"
1105 			<< "  hitAttribute = ivec4(0,0,0,0);\n"
1106 			<< "  reportIntersectionEXT(0.5f, 0);\n";
1107 		if (storeInISec)
1108 		{
1109 			if (m_data.useCullMask)
1110 			{
1111 				css << storeCullMask;
1112 			}
1113 			else
1114 			{
1115 				css << storeCustomIndex;
1116 			}
1117 		}
1118 
1119 		css << "}\n";
1120 
1121 		programCollection.glslSources.add("isect") << glu::IntersectionSource(updateRayTracingGLSL(css.str())) << buildOptions;
1122 	}
1123 
1124 	{
1125 		std::stringstream css;
1126 		css
1127 			<< "#version 460 core\n"
1128 			<< "#extension GL_EXT_ray_tracing : require\n"
1129 			<< ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1130 			<< "layout(location = 0) rayPayloadInEXT ivec4 hitValue;\n";
1131 
1132 		if (!storeInRGen)
1133 			css << imageDeclaration;
1134 
1135 		css
1136 			<< "void main()\n"
1137 			<< "{\n"
1138 			<< "  hitValue = ivec4(1,0,0,1);\n";
1139 		if (!storeInRGen)
1140 		{
1141 			if (m_data.useCullMask)
1142 			{
1143 				css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(bitfieldReverse(uint(gl_CullMaskEXT)), 0, 0, 1)); \n";
1144 			}
1145 			else
1146 			{
1147 				css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n";
1148 			}
1149 		}
1150 
1151 		css << "}\n";
1152 
1153 		programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1154 	}
1155 
1156 	{
1157 		std::stringstream css;
1158 		css <<
1159 			"#version 460 core\n"
1160 			"#extension GL_EXT_ray_tracing : require\n"
1161 			"layout(location = 0) rayPayloadEXT vec4 hitValue;\n"
1162 			"layout(r32f, set = 0, binding = 0) uniform image2D result;\n"
1163 			"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
1164 			"\n"
1165 			"vec3 calculateOrigin(vec3 zeroOrigin, vec3 xAxis, vec3 yAxis)\n"
1166 			"{\n"
1167 			"  return zeroOrigin + (float(gl_LaunchIDEXT.x)/float(gl_LaunchSizeEXT.x-1)) * xAxis + (float(gl_LaunchIDEXT.y)/float(gl_LaunchSizeEXT.y-1)) * yAxis;\n"
1168 			"}\n"
1169 			"\n"
1170 			"void main()\n"
1171 			"{\n"
1172 			"  float tmin      = 0.0;\n"
1173 			"  float tmax      = 2.0;\n"
1174 			"  vec3  origin    = calculateOrigin( vec3(0.1,0.1,1.0), vec3(0.2,0.0,0.0), vec3(0.0,0.2,0.0) );\n"
1175 			"  vec3  direction = vec3(0.0,0.0,-1.0);\n"
1176 			"  hitValue        = vec4(0.0,0.0,0.0,0.0);\n"
1177 			"  traceRayEXT(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);\n"
1178 			"  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n"
1179 			"}\n";
1180 		programCollection.glslSources.add("rgen_depth") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
1181 	}
1182 
1183 	{
1184 		std::stringstream css;
1185 		css <<
1186 			"#version 460 core\n"
1187 			"#extension GL_EXT_ray_tracing : require\n"
1188 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1189 			"void main()\n"
1190 			"{\n"
1191 			"  hitValue = vec4(gl_RayTmaxEXT,0.0,0.0,1.0);\n"
1192 			"}\n";
1193 
1194 		programCollection.glslSources.add("chit_depth") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1195 	}
1196 
1197 	{
1198 		std::stringstream css;
1199 		css <<
1200 			"#version 460 core\n"
1201 			"#extension GL_EXT_ray_tracing : require\n"
1202 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1203 			"void main()\n"
1204 			"{\n"
1205 			"  hitValue = vec4(0.0,0.0,0.0,1.0);\n"
1206 			"}\n";
1207 
1208 		programCollection.glslSources.add("miss_depth") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1209 	}
1210 }
1211 
createInstance(Context & context) const1212 TestInstance* RayTracingASBasicTestCase::createInstance (Context& context) const
1213 {
1214 	return new RayTracingASBasicTestInstance(context, m_data);
1215 }
1216 
RayTracingASFuncArgTestCase(tcu::TestContext & context,const char * name,const char * desc,const TestParams & data)1217 RayTracingASFuncArgTestCase::RayTracingASFuncArgTestCase (tcu::TestContext& context, const char* name, const char* desc, const TestParams& data)
1218 	: RayTracingASBasicTestCase (context, name, desc, data)
1219 {
1220 }
1221 
initPrograms(SourceCollections & programCollection) const1222 void RayTracingASFuncArgTestCase::initPrograms (SourceCollections& programCollection) const
1223 {
1224 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
1225 	const vk::SpirVAsmBuildOptions	spvBuildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, true);
1226 
1227 	{
1228 		// The SPIR-V assembly below is based on the following GLSL code. Some
1229 		// modifications have been made to make traceRaysBottomWrapper take a bare
1230 		// acceleration structure as its argument instead of a pointer to it, so we can
1231 		// test passing a pointer and a bare value in the same test.
1232 		//
1233 		//	#version 460 core
1234 		//	#extension GL_EXT_ray_tracing : require
1235 		//	layout(location = 0) rayPayloadEXT vec4 hitValue;
1236 		//	layout(r32f, set = 0, binding = 0) uniform image2D result;
1237 		//	layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;
1238 		//
1239 		//	void traceRaysBottomWrapper(
1240 		//	  accelerationStructureEXT topLevel,
1241 		//	  uint rayFlags,
1242 		//	  uint cullMask,
1243 		//	  uint sbtRecordOffset,
1244 		//	  uint sbtRecordStride,
1245 		//	  uint missIndex,
1246 		//	  vec3 origin,
1247 		//	  float Tmin,
1248 		//	  vec3 direction,
1249 		//	  float Tmax)
1250 		//	{
1251 		//	  traceRayEXT(topLevel, rayFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, origin, Tmin, direction, Tmax, 0);
1252 		//	}
1253 		//
1254 		//	void traceRaysTopWrapper(
1255 		//	  accelerationStructureEXT topLevel,
1256 		//	  uint rayFlags,
1257 		//	  uint cullMask,
1258 		//	  uint sbtRecordOffset,
1259 		//	  uint sbtRecordStride,
1260 		//	  uint missIndex,
1261 		//	  vec3 origin,
1262 		//	  float Tmin,
1263 		//	  vec3 direction,
1264 		//	  float Tmax)
1265 		//	{
1266 		//	  traceRaysBottomWrapper(topLevel, rayFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, origin, Tmin, direction, Tmax);
1267 		//	}
1268 		//
1269 		//	vec3 calculateOrigin(vec3 zeroOrigin, vec3 xAxis, vec3 yAxis)
1270 		//	{
1271 		//	  return zeroOrigin + (float(gl_LaunchIDEXT.x)/float(gl_LaunchSizeEXT.x-1)) * xAxis + (float(gl_LaunchIDEXT.y)/float(gl_LaunchSizeEXT.y-1)) * yAxis;
1272 		//	}
1273 		//
1274 		//	void main()
1275 		//	{
1276 		//	  float tmin      = 0.0;
1277 		//	  float tmax      = 2.0;
1278 		//	  vec3  origin    = calculateOrigin( vec3(0.1,0.1,1.0), vec3(0.2,0.0,0.0), vec3(0.0,0.2,0.0) );
1279 		//	  vec3  direction = vec3(0.0,0.0,-1.0);
1280 		//	  hitValue        = vec4(0.0,0.0,0.0,0.0);
1281 		//	  traceRaysTopWrapper(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direction, tmax);
1282 		//	  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);
1283 		//	}
1284 
1285 		std::ostringstream rgen;
1286 		rgen
1287 			<< "; SPIR-V\n"
1288 			<< "; Version: 1.4\n"
1289 			<< "; Generator: Khronos Glslang Reference Front End; 10\n"
1290 			<< "; Bound: 156\n"
1291 			<< "; Schema: 0\n"
1292 			<< "OpCapability RayTracingKHR\n"
1293 			<< "OpExtension \"SPV_KHR_ray_tracing\"\n"
1294 			<< "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1295 			<< "OpMemoryModel Logical GLSL450\n"
1296 			<< "OpEntryPoint RayGenerationKHR %4 \"main\" %59 %82 %88 %130 %148\n"
1297 			<< "OpDecorate %59 Location 0\n"
1298 			<< "OpDecorate %82 BuiltIn LaunchIdKHR\n"
1299 			<< "OpDecorate %88 BuiltIn LaunchSizeKHR\n"
1300 			<< "OpDecorate %130 DescriptorSet 0\n"
1301 			<< "OpDecorate %130 Binding 1\n"
1302 			<< "OpDecorate %148 DescriptorSet 0\n"
1303 			<< "OpDecorate %148 Binding 0\n"
1304 			<< "%2 = OpTypeVoid\n"
1305 			<< "%3 = OpTypeFunction %2\n"
1306 
1307 			// This is the bare type.
1308 			<< "%6 = OpTypeAccelerationStructureKHR\n"
1309 
1310 			// This is the pointer type.
1311 			<< "%7 = OpTypePointer UniformConstant %6\n"
1312 
1313 			<< "%8 = OpTypeInt 32 0\n"
1314 			<< "%9 = OpTypePointer Function %8\n"
1315 			<< "%10 = OpTypeFloat 32\n"
1316 			<< "%11 = OpTypeVector %10 3\n"
1317 			<< "%12 = OpTypePointer Function %11\n"
1318 			<< "%13 = OpTypePointer Function %10\n"
1319 
1320 			// This is the type for traceRaysTopWrapper and also the original traceRaysBottomWrapper.
1321 			<< "%14 = OpTypeFunction %2 %7 %9 %9 %9 %9 %9 %12 %13 %12 %13\n"
1322 
1323 			// This is the modified type to take a bare AS as the first argument, for the modified version of traceRaysBottomWrapper.
1324 			<< "%14b = OpTypeFunction %2 %6 %9 %9 %9 %9 %9 %12 %13 %12 %13\n"
1325 
1326 			<< "%39 = OpTypeFunction %11 %12 %12 %12\n"
1327 			<< "%55 = OpTypeInt 32 1\n"
1328 			<< "%56 = OpConstant %55 0\n"
1329 			<< "%57 = OpTypeVector %10 4\n"
1330 			<< "%58 = OpTypePointer RayPayloadKHR %57\n"
1331 			<< "%59 = OpVariable %58 RayPayloadKHR\n"
1332 			<< "%80 = OpTypeVector %8 3\n"
1333 			<< "%81 = OpTypePointer Input %80\n"
1334 			<< "%82 = OpVariable %81 Input\n"
1335 			<< "%83 = OpConstant %8 0\n"
1336 			<< "%84 = OpTypePointer Input %8\n"
1337 			<< "%88 = OpVariable %81 Input\n"
1338 			<< "%91 = OpConstant %8 1\n"
1339 			<< "%112 = OpConstant %10 0\n"
1340 			<< "%114 = OpConstant %10 2\n"
1341 			<< "%116 = OpConstant %10 0.100000001\n"
1342 			<< "%117 = OpConstant %10 1\n"
1343 			<< "%118 = OpConstantComposite %11 %116 %116 %117\n"
1344 			<< "%119 = OpConstant %10 0.200000003\n"
1345 			<< "%120 = OpConstantComposite %11 %119 %112 %112\n"
1346 			<< "%121 = OpConstantComposite %11 %112 %119 %112\n"
1347 			<< "%127 = OpConstant %10 -1\n"
1348 			<< "%128 = OpConstantComposite %11 %112 %112 %127\n"
1349 			<< "%129 = OpConstantComposite %57 %112 %112 %112 %112\n"
1350 			<< "%130 = OpVariable %7 UniformConstant\n"
1351 			<< "%131 = OpConstant %8 255\n"
1352 			<< "%146 = OpTypeImage %10 2D 0 0 0 2 R32f\n"
1353 			<< "%147 = OpTypePointer UniformConstant %146\n"
1354 			<< "%148 = OpVariable %147 UniformConstant\n"
1355 			<< "%150 = OpTypeVector %8 2\n"
1356 			<< "%153 = OpTypeVector %55 2\n"
1357 
1358 			// This is main().
1359 			<< "%4 = OpFunction %2 None %3\n"
1360 			<< "%5 = OpLabel\n"
1361 			<< "%111 = OpVariable %13 Function\n"
1362 			<< "%113 = OpVariable %13 Function\n"
1363 			<< "%115 = OpVariable %12 Function\n"
1364 			<< "%122 = OpVariable %12 Function\n"
1365 			<< "%123 = OpVariable %12 Function\n"
1366 			<< "%124 = OpVariable %12 Function\n"
1367 			<< "%126 = OpVariable %12 Function\n"
1368 			<< "%132 = OpVariable %9 Function\n"
1369 			<< "%133 = OpVariable %9 Function\n"
1370 			<< "%134 = OpVariable %9 Function\n"
1371 			<< "%135 = OpVariable %9 Function\n"
1372 			<< "%136 = OpVariable %9 Function\n"
1373 			<< "%137 = OpVariable %12 Function\n"
1374 			<< "%139 = OpVariable %13 Function\n"
1375 			<< "%141 = OpVariable %12 Function\n"
1376 			<< "%143 = OpVariable %13 Function\n"
1377 			<< "OpStore %111 %112\n"
1378 			<< "OpStore %113 %114\n"
1379 			<< "OpStore %122 %118\n"
1380 			<< "OpStore %123 %120\n"
1381 			<< "OpStore %124 %121\n"
1382 			<< "%125 = OpFunctionCall %11 %43 %122 %123 %124\n"
1383 			<< "OpStore %115 %125\n"
1384 			<< "OpStore %126 %128\n"
1385 			<< "OpStore %59 %129\n"
1386 			<< "OpStore %132 %83\n"
1387 			<< "OpStore %133 %131\n"
1388 			<< "OpStore %134 %83\n"
1389 			<< "OpStore %135 %83\n"
1390 			<< "OpStore %136 %83\n"
1391 			<< "%138 = OpLoad %11 %115\n"
1392 			<< "OpStore %137 %138\n"
1393 			<< "%140 = OpLoad %10 %111\n"
1394 			<< "OpStore %139 %140\n"
1395 			<< "%142 = OpLoad %11 %126\n"
1396 			<< "OpStore %141 %142\n"
1397 			<< "%144 = OpLoad %10 %113\n"
1398 			<< "OpStore %143 %144\n"
1399 			<< "%145 = OpFunctionCall %2 %37 %130 %132 %133 %134 %135 %136 %137 %139 %141 %143\n"
1400 			<< "%149 = OpLoad %146 %148\n"
1401 			<< "%151 = OpLoad %80 %82\n"
1402 			<< "%152 = OpVectorShuffle %150 %151 %151 0 1\n"
1403 			<< "%154 = OpBitcast %153 %152\n"
1404 			<< "%155 = OpLoad %57 %59\n"
1405 			<< "OpImageWrite %149 %154 %155\n"
1406 			<< "OpReturn\n"
1407 			<< "OpFunctionEnd\n"
1408 
1409 			// This is traceRaysBottomWrapper, doing the OpTraceRayKHR call.
1410 			// We have modified the type so it takes a bare AS as the first argument.
1411 			// %25 = OpFunction %2 None %14
1412 			<< "%25 = OpFunction %2 None %14b\n"
1413 
1414 			// Also the type of the first argument here.
1415 			// %15 = OpFunctionParameter %7
1416 			<< "%15 = OpFunctionParameter %6\n"
1417 
1418 			<< "%16 = OpFunctionParameter %9\n"
1419 			<< "%17 = OpFunctionParameter %9\n"
1420 			<< "%18 = OpFunctionParameter %9\n"
1421 			<< "%19 = OpFunctionParameter %9\n"
1422 			<< "%20 = OpFunctionParameter %9\n"
1423 			<< "%21 = OpFunctionParameter %12\n"
1424 			<< "%22 = OpFunctionParameter %13\n"
1425 			<< "%23 = OpFunctionParameter %12\n"
1426 			<< "%24 = OpFunctionParameter %13\n"
1427 			<< "%26 = OpLabel\n"
1428 
1429 			// We no longer need to dereference the pointer here.
1430 			// %45 = OpLoad %6 %15
1431 
1432 			<< "%46 = OpLoad %8 %16\n"
1433 			<< "%47 = OpLoad %8 %17\n"
1434 			<< "%48 = OpLoad %8 %18\n"
1435 			<< "%49 = OpLoad %8 %19\n"
1436 			<< "%50 = OpLoad %8 %20\n"
1437 			<< "%51 = OpLoad %11 %21\n"
1438 			<< "%52 = OpLoad %10 %22\n"
1439 			<< "%53 = OpLoad %11 %23\n"
1440 			<< "%54 = OpLoad %10 %24\n"
1441 
1442 			// And we can use the first argument here directly.
1443 			// OpTraceRayKHR %45 %46 %47 %48 %49 %50 %51 %52 %53 %54 %59
1444 			<< "OpTraceRayKHR %15 %46 %47 %48 %49 %50 %51 %52 %53 %54 %59\n"
1445 
1446 			<< "OpReturn\n"
1447 			<< "OpFunctionEnd\n"
1448 
1449 			// This is traceRaysTopWrapper, which calls traceRaysBottomWrapper.
1450 			<< "%37 = OpFunction %2 None %14\n"
1451 
1452 			// First argument, pointer to AS.
1453 			<< "%27 = OpFunctionParameter %7\n"
1454 
1455 			<< "%28 = OpFunctionParameter %9\n"
1456 			<< "%29 = OpFunctionParameter %9\n"
1457 			<< "%30 = OpFunctionParameter %9\n"
1458 			<< "%31 = OpFunctionParameter %9\n"
1459 			<< "%32 = OpFunctionParameter %9\n"
1460 			<< "%33 = OpFunctionParameter %12\n"
1461 			<< "%34 = OpFunctionParameter %13\n"
1462 			<< "%35 = OpFunctionParameter %12\n"
1463 			<< "%36 = OpFunctionParameter %13\n"
1464 			<< "%38 = OpLabel\n"
1465 			<< "%60 = OpVariable %9 Function\n"
1466 			<< "%62 = OpVariable %9 Function\n"
1467 			<< "%64 = OpVariable %9 Function\n"
1468 			<< "%66 = OpVariable %9 Function\n"
1469 			<< "%68 = OpVariable %9 Function\n"
1470 			<< "%70 = OpVariable %12 Function\n"
1471 			<< "%72 = OpVariable %13 Function\n"
1472 			<< "%74 = OpVariable %12 Function\n"
1473 			<< "%76 = OpVariable %13 Function\n"
1474 
1475 			// Dereference the pointer to pass the AS as the first argument.
1476 			<< "%27b = OpLoad %6 %27\n"
1477 
1478 			<< "%61 = OpLoad %8 %28\n"
1479 			<< "OpStore %60 %61\n"
1480 			<< "%63 = OpLoad %8 %29\n"
1481 			<< "OpStore %62 %63\n"
1482 			<< "%65 = OpLoad %8 %30\n"
1483 			<< "OpStore %64 %65\n"
1484 			<< "%67 = OpLoad %8 %31\n"
1485 			<< "OpStore %66 %67\n"
1486 			<< "%69 = OpLoad %8 %32\n"
1487 			<< "OpStore %68 %69\n"
1488 			<< "%71 = OpLoad %11 %33\n"
1489 			<< "OpStore %70 %71\n"
1490 			<< "%73 = OpLoad %10 %34\n"
1491 			<< "OpStore %72 %73\n"
1492 			<< "%75 = OpLoad %11 %35\n"
1493 			<< "OpStore %74 %75\n"
1494 			<< "%77 = OpLoad %10 %36\n"
1495 			<< "OpStore %76 %77\n"
1496 
1497 			// %2 is void, %25 is traceRaysBottomWrapper and %27 was the first argument.
1498 			// We need to pass the loaded AS instead.
1499 			// %78 = OpFunctionCall %2 %25 %27 %60 %62 %64 %66 %68 %70 %72 %74 %76
1500 			<< "%78 = OpFunctionCall %2 %25 %27b %60 %62 %64 %66 %68 %70 %72 %74 %76\n"
1501 
1502 			<< "OpReturn\n"
1503 			<< "OpFunctionEnd\n"
1504 
1505 			// This is calculateOrigin().
1506 			<< "%43 = OpFunction %11 None %39\n"
1507 			<< "%40 = OpFunctionParameter %12\n"
1508 			<< "%41 = OpFunctionParameter %12\n"
1509 			<< "%42 = OpFunctionParameter %12\n"
1510 			<< "%44 = OpLabel\n"
1511 			<< "%79 = OpLoad %11 %40\n"
1512 			<< "%85 = OpAccessChain %84 %82 %83\n"
1513 			<< "%86 = OpLoad %8 %85\n"
1514 			<< "%87 = OpConvertUToF %10 %86\n"
1515 			<< "%89 = OpAccessChain %84 %88 %83\n"
1516 			<< "%90 = OpLoad %8 %89\n"
1517 			<< "%92 = OpISub %8 %90 %91\n"
1518 			<< "%93 = OpConvertUToF %10 %92\n"
1519 			<< "%94 = OpFDiv %10 %87 %93\n"
1520 			<< "%95 = OpLoad %11 %41\n"
1521 			<< "%96 = OpVectorTimesScalar %11 %95 %94\n"
1522 			<< "%97 = OpFAdd %11 %79 %96\n"
1523 			<< "%98 = OpAccessChain %84 %82 %91\n"
1524 			<< "%99 = OpLoad %8 %98\n"
1525 			<< "%100 = OpConvertUToF %10 %99\n"
1526 			<< "%101 = OpAccessChain %84 %88 %91\n"
1527 			<< "%102 = OpLoad %8 %101\n"
1528 			<< "%103 = OpISub %8 %102 %91\n"
1529 			<< "%104 = OpConvertUToF %10 %103\n"
1530 			<< "%105 = OpFDiv %10 %100 %104\n"
1531 			<< "%106 = OpLoad %11 %42\n"
1532 			<< "%107 = OpVectorTimesScalar %11 %106 %105\n"
1533 			<< "%108 = OpFAdd %11 %97 %107\n"
1534 			<< "OpReturnValue %108\n"
1535 			<< "OpFunctionEnd\n"
1536 			;
1537 
1538 		programCollection.spirvAsmSources.add("rgen_depth") << spvBuildOptions << rgen.str();
1539 	}
1540 
1541 	// chit_depth and miss_depth below have been left untouched.
1542 
1543 	{
1544 		std::stringstream css;
1545 		css <<
1546 			"#version 460 core\n"
1547 			"#extension GL_EXT_ray_tracing : require\n"
1548 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1549 			"void main()\n"
1550 			"{\n"
1551 			"  hitValue = vec4(gl_RayTmaxEXT,0.0,0.0,1.0);\n"
1552 			"}\n";
1553 
1554 		programCollection.glslSources.add("chit_depth") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1555 	}
1556 
1557 	{
1558 		std::stringstream css;
1559 		css <<
1560 			"#version 460 core\n"
1561 			"#extension GL_EXT_ray_tracing : require\n"
1562 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1563 			"void main()\n"
1564 			"{\n"
1565 			"  hitValue = vec4(0.0,0.0,0.0,1.0);\n"
1566 			"}\n";
1567 
1568 		programCollection.glslSources.add("miss_depth") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1569 	}
1570 }
1571 
RayTracingASBasicTestInstance(Context & context,const TestParams & data)1572 RayTracingASBasicTestInstance::RayTracingASBasicTestInstance (Context& context, const TestParams& data)
1573 	: vkt::TestInstance		(context)
1574 	, m_data				(data)
1575 {
1576 }
1577 
runTest(const deUint32 workerThreadsCount)1578 de::MovePtr<BufferWithMemory> RayTracingASBasicTestInstance::runTest(const deUint32 workerThreadsCount)
1579 {
1580 	const InstanceInterface&			vki									= m_context.getInstanceInterface();
1581 	const DeviceInterface&				vkd									= m_context.getDeviceInterface();
1582 	const VkDevice						device								= m_context.getDevice();
1583 	const VkPhysicalDevice				physicalDevice						= m_context.getPhysicalDevice();
1584 	const deUint32						queueFamilyIndex					= m_context.getUniversalQueueFamilyIndex();
1585 	const VkQueue						queue								= m_context.getUniversalQueue();
1586 	Allocator&							allocator							= m_context.getDefaultAllocator();
1587 	const deUint32						pixelCount							= m_data.width * m_data.height;
1588 	const deUint32						shaderGroupHandleSize				= getShaderGroupSize(vki, physicalDevice);
1589 	const deUint32						shaderGroupBaseAlignment			= getShaderGroupBaseAlignment(vki, physicalDevice);
1590 	const bool							htCopy								= (workerThreadsCount != 0) && (m_data.operationType == OP_COPY);
1591 	const bool							htSerialize							= (workerThreadsCount != 0) && (m_data.operationType == OP_SERIALIZE);
1592 
1593 	const Move<VkDescriptorSetLayout>	descriptorSetLayout					= DescriptorSetLayoutBuilder()
1594 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
1595 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
1596 																					.build(vkd, device);
1597 	const Move<VkDescriptorPool>		descriptorPool						= DescriptorPoolBuilder()
1598 																					.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1599 																					.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1600 																					.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1601 	const Move<VkDescriptorSet>			descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
1602 	const Move<VkPipelineLayout>		pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
1603 
1604 	de::MovePtr<RayTracingPipeline>		rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
1605 	m_data.testConfiguration->initRayTracingShaders(rayTracingPipeline, m_context, m_data);
1606 	Move<VkPipeline>					pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
1607 
1608 	de::MovePtr<BufferWithMemory>		raygenShaderBindingTable;
1609 	de::MovePtr<BufferWithMemory>		hitShaderBindingTable;
1610 	de::MovePtr<BufferWithMemory>		missShaderBindingTable;
1611 	m_data.testConfiguration->initShaderBindingTables(rayTracingPipeline, m_context, m_data, *pipeline, shaderGroupHandleSize, shaderGroupBaseAlignment, raygenShaderBindingTable, hitShaderBindingTable, missShaderBindingTable);
1612 
1613 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(),	0),	shaderGroupHandleSize,	shaderGroupHandleSize);
1614 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(),		0),	shaderGroupHandleSize,	shaderGroupHandleSize);
1615 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(),		0),	shaderGroupHandleSize,	shaderGroupHandleSize);
1616 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL,																	0,						0);
1617 
1618 	const VkFormat						imageFormat							= m_data.testConfiguration->getResultImageFormat();
1619 	const VkImageCreateInfo				imageCreateInfo						= makeImageCreateInfo(m_data.width, m_data.height, imageFormat);
1620 	const VkImageSubresourceRange		imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
1621 	const de::MovePtr<ImageWithMemory>	image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
1622 	const Move<VkImageView>				imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, imageFormat, imageSubresourceRange);
1623 
1624 	const VkBufferCreateInfo			resultBufferCreateInfo				= makeBufferCreateInfo(pixelCount*m_data.testConfiguration->getResultImageFormatSize(), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1625 	const VkImageSubresourceLayers		resultBufferImageSubresourceLayers	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1626 	const VkBufferImageCopy				resultBufferImageRegion				= makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), resultBufferImageSubresourceLayers);
1627 	de::MovePtr<BufferWithMemory>		resultBuffer						= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
1628 
1629 	const VkDescriptorImageInfo			descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
1630 
1631 	const Move<VkCommandPool>			cmdPool								= createCommandPool(vkd, device, 0, queueFamilyIndex);
1632 	const Move<VkCommandBuffer>			cmdBuffer							= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1633 
1634 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomLevelAccelerationStructures;
1635 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructure;
1636 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomLevelAccelerationStructureCopies;
1637 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructureCopy;
1638 	std::vector<de::SharedPtr<SerialStorage>>						bottomSerialized;
1639 	std::vector<de::SharedPtr<SerialStorage>>						topSerialized;
1640 	std::vector<VkDeviceSize>			accelerationCompactedSizes;
1641 	std::vector<VkDeviceSize>			accelerationSerialSizes;
1642 	Move<VkQueryPool>					m_queryPoolCompact;
1643 	Move<VkQueryPool>					m_queryPoolSerial;
1644 
1645 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
1646 	{
1647 		const VkImageMemoryBarrier				preImageBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
1648 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1649 			**image, imageSubresourceRange);
1650 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
1651 		const VkClearValue						clearValue = m_data.testConfiguration->getClearValue();
1652 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
1653 		const VkImageMemoryBarrier				postImageBarrier = makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
1654 			VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
1655 			**image, imageSubresourceRange);
1656 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
1657 
1658 		// build bottom level acceleration structures and their copies ( only when we are testing copying bottom level acceleration structures )
1659 		bool									bottomCompact		= m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
1660 		bool									bottomSerial		= m_data.operationType == OP_SERIALIZE && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
1661 		const bool								buildWithoutGeom	= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM);
1662 		const bool								bottomNoPrimitives	= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM);
1663 		const bool								topNoPrimitives		= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP);
1664 		const bool								inactiveInstances	= (m_data.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_INSTANCES);
1665 		bottomLevelAccelerationStructures							= m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
1666 		VkBuildAccelerationStructureFlagsKHR	allowCompactionFlag	= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR;
1667 		VkBuildAccelerationStructureFlagsKHR	emptyCompactionFlag	= VkBuildAccelerationStructureFlagsKHR(0);
1668 		VkBuildAccelerationStructureFlagsKHR	bottomCompactFlags	= (bottomCompact ? allowCompactionFlag : emptyCompactionFlag);
1669 		VkBuildAccelerationStructureFlagsKHR	bottomBuildFlags	= m_data.buildFlags | bottomCompactFlags;
1670 		std::vector<VkAccelerationStructureKHR>	accelerationStructureHandles;
1671 		std::vector<VkDeviceSize>				bottomBlasCompactSize;
1672 		std::vector<VkDeviceSize>				bottomBlasSerialSize;
1673 
1674 		for (auto& blas : bottomLevelAccelerationStructures)
1675 		{
1676 			blas->setBuildType						(m_data.buildType);
1677 			blas->setBuildFlags						(bottomBuildFlags);
1678 			blas->setUseArrayOfPointers				(m_data.bottomUsesAOP);
1679 			blas->setCreateGeneric					(m_data.bottomGeneric);
1680 			blas->setBuildWithoutGeometries			(buildWithoutGeom);
1681 			blas->setBuildWithoutPrimitives			(bottomNoPrimitives);
1682 			blas->createAndBuild					(vkd, device, *cmdBuffer, allocator);
1683 			accelerationStructureHandles.push_back	(*(blas->getPtr()));
1684 		}
1685 
1686 		if (m_data.operationType == OP_COMPACT)
1687 		{
1688 			deUint32 queryCount	= (m_data.operationTarget == OT_BOTTOM_ACCELERATION) ? deUint32(bottomLevelAccelerationStructures.size()) : 1u;
1689 			if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1690 				m_queryPoolCompact = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount);
1691 			if (m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1692 				queryAccelerationStructureSize(vkd, device, *cmdBuffer, accelerationStructureHandles, m_data.buildType, m_queryPoolCompact.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, bottomBlasCompactSize);
1693 		}
1694 		if (m_data.operationType == OP_SERIALIZE)
1695 		{
1696 			deUint32 queryCount	= (m_data.operationTarget == OT_BOTTOM_ACCELERATION) ? deUint32(bottomLevelAccelerationStructures.size()) : 1u;
1697 			if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1698 				m_queryPoolSerial = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
1699 			if (m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1700 				queryAccelerationStructureSize(vkd, device, *cmdBuffer, accelerationStructureHandles, m_data.buildType, m_queryPoolSerial.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, bottomBlasSerialSize);
1701 		}
1702 
1703 		// if AS is built on GPU and we are planning to make a compact copy of it or serialize / deserialize it - we have to have download query results to CPU
1704 		if ((m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) && (bottomCompact || bottomSerial))
1705 		{
1706 			endCommandBuffer(vkd, *cmdBuffer);
1707 
1708 			submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1709 
1710 			if (bottomCompact)
1711 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolCompact, 0u, deUint32(bottomBlasCompactSize.size()), sizeof(VkDeviceSize) * bottomBlasCompactSize.size(), bottomBlasCompactSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1712 			if (bottomSerial)
1713 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolSerial, 0u, deUint32(bottomBlasSerialSize.size()), sizeof(VkDeviceSize) * bottomBlasSerialSize.size(), bottomBlasSerialSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1714 
1715 			vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1716 			beginCommandBuffer(vkd, *cmdBuffer, 0u);
1717 		}
1718 
1719 		auto bottomLevelAccelerationStructuresPtr								= &bottomLevelAccelerationStructures;
1720 		if (m_data.operationType != OP_NONE && m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1721 		{
1722 			switch (m_data.operationType)
1723 			{
1724 			case OP_COPY:
1725 			{
1726 				for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1727 				{
1728 					de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1729 					asCopy->setDeferredOperation(htCopy, workerThreadsCount);
1730 					asCopy->setBuildType(m_data.buildType);
1731 					asCopy->setBuildFlags(m_data.buildFlags);
1732 					asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1733 					asCopy->setCreateGeneric(m_data.bottomGeneric);
1734 					asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1735 					asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1736 					asCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, bottomLevelAccelerationStructures[i].get(), 0u, 0u);
1737 					bottomLevelAccelerationStructureCopies.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1738 				}
1739 				break;
1740 			}
1741 			case OP_COMPACT:
1742 			{
1743 				for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1744 				{
1745 					de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1746 					asCopy->setBuildType(m_data.buildType);
1747 					asCopy->setBuildFlags(m_data.buildFlags);
1748 					asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1749 					asCopy->setCreateGeneric(m_data.bottomGeneric);
1750 					asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1751 					asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1752 					asCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, bottomLevelAccelerationStructures[i].get(), bottomBlasCompactSize[i], 0u);
1753 					bottomLevelAccelerationStructureCopies.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1754 				}
1755 				break;
1756 			}
1757 			case OP_SERIALIZE:
1758 			{
1759 				//bottomLevelAccelerationStructureCopies = m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
1760 				for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1761 				{
1762 					de::SharedPtr<SerialStorage> storage ( new SerialStorage(vkd, device, allocator, m_data.buildType, bottomBlasSerialSize[i]));
1763 
1764 					bottomLevelAccelerationStructures[i]->setDeferredOperation(htSerialize, workerThreadsCount);
1765 					bottomLevelAccelerationStructures[i]->serialize(vkd, device, *cmdBuffer, storage.get());
1766 					bottomSerialized.push_back(storage);
1767 
1768 					if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1769 					{
1770 						endCommandBuffer(vkd, *cmdBuffer);
1771 
1772 						submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1773 
1774 						vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1775 						beginCommandBuffer(vkd, *cmdBuffer, 0u);
1776 					}
1777 
1778 					de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1779 					asCopy->setBuildType(m_data.buildType);
1780 					asCopy->setBuildFlags(m_data.buildFlags);
1781 					asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1782 					asCopy->setCreateGeneric(m_data.bottomGeneric);
1783 					asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1784 					asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1785 					asCopy->setDeferredOperation(htSerialize, workerThreadsCount);
1786 					asCopy->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, storage.get(), 0u);
1787 					bottomLevelAccelerationStructureCopies.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1788 				}
1789 				break;
1790 			}
1791 			default:
1792 				DE_ASSERT(DE_FALSE);
1793 			}
1794 			bottomLevelAccelerationStructuresPtr = &bottomLevelAccelerationStructureCopies;
1795 		}
1796 
1797 		// build top level acceleration structures and their copies ( only when we are testing copying top level acceleration structures )
1798 		bool									topCompact			= m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_TOP_ACCELERATION;
1799 		bool									topSerial			= m_data.operationType == OP_SERIALIZE && m_data.operationTarget == OT_TOP_ACCELERATION;
1800 		VkBuildAccelerationStructureFlagsKHR	topCompactFlags		= (topCompact ? allowCompactionFlag : emptyCompactionFlag);
1801 		VkBuildAccelerationStructureFlagsKHR	topBuildFlags		= m_data.buildFlags | topCompactFlags;
1802 		std::vector<VkAccelerationStructureKHR> topLevelStructureHandles;
1803 		std::vector<VkDeviceSize>				topBlasCompactSize;
1804 		std::vector<VkDeviceSize>				topBlasSerialSize;
1805 
1806 		topLevelAccelerationStructure								= m_data.testConfiguration->initTopAccelerationStructure(m_context, m_data, *bottomLevelAccelerationStructuresPtr);
1807 		topLevelAccelerationStructure->setBuildType					(m_data.buildType);
1808 		topLevelAccelerationStructure->setBuildFlags				(topBuildFlags);
1809 		topLevelAccelerationStructure->setBuildWithoutPrimitives	(topNoPrimitives);
1810 		topLevelAccelerationStructure->setUseArrayOfPointers		(m_data.topUsesAOP);
1811 		topLevelAccelerationStructure->setCreateGeneric				(m_data.topGeneric);
1812 		topLevelAccelerationStructure->setInactiveInstances			(inactiveInstances);
1813 		topLevelAccelerationStructure->createAndBuild				(vkd, device, *cmdBuffer, allocator);
1814 		topLevelStructureHandles.push_back							(*(topLevelAccelerationStructure->getPtr()));
1815 
1816 		if (topCompact)
1817 			queryAccelerationStructureSize(vkd, device, *cmdBuffer, topLevelStructureHandles, m_data.buildType, m_queryPoolCompact.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, topBlasCompactSize);
1818 		if (topSerial)
1819 			queryAccelerationStructureSize(vkd, device, *cmdBuffer, topLevelStructureHandles, m_data.buildType, m_queryPoolSerial.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, topBlasSerialSize);
1820 
1821 		// if AS is built on GPU and we are planning to make a compact copy of it or serialize / deserialize it - we have to have download query results to CPU
1822 		if ((m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) && (topCompact || topSerial))
1823 		{
1824 			endCommandBuffer(vkd, *cmdBuffer);
1825 
1826 			submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1827 
1828 			if (topCompact)
1829 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolCompact, 0u, deUint32(topBlasCompactSize.size()), sizeof(VkDeviceSize) * topBlasCompactSize.size(), topBlasCompactSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1830 			if (topSerial)
1831 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolSerial, 0u, deUint32(topBlasSerialSize.size()), sizeof(VkDeviceSize) * topBlasSerialSize.size(), topBlasSerialSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1832 
1833 			vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1834 			beginCommandBuffer(vkd, *cmdBuffer, 0u);
1835 		}
1836 
1837 		const TopLevelAccelerationStructure*			topLevelRayTracedPtr	= topLevelAccelerationStructure.get();
1838 		if (m_data.operationType != OP_NONE && m_data.operationTarget == OT_TOP_ACCELERATION)
1839 		{
1840 			switch (m_data.operationType)
1841 			{
1842 				case OP_COPY:
1843 				{
1844 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
1845 					topLevelAccelerationStructureCopy->setDeferredOperation(htCopy, workerThreadsCount);
1846 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
1847 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
1848 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
1849 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
1850 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
1851 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
1852 					topLevelAccelerationStructureCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, topLevelAccelerationStructure.get(), 0u, 0u);
1853 					break;
1854 				}
1855 				case OP_COMPACT:
1856 				{
1857 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
1858 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
1859 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
1860 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
1861 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
1862 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
1863 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
1864 					topLevelAccelerationStructureCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, topLevelAccelerationStructure.get(), topBlasCompactSize[0], 0u);
1865 					break;
1866 				}
1867 				case OP_SERIALIZE:
1868 				{
1869 					de::SharedPtr<SerialStorage> storage = de::SharedPtr<SerialStorage>(new SerialStorage(vkd, device, allocator, m_data.buildType, topBlasSerialSize[0]));
1870 
1871 					topLevelAccelerationStructure->setDeferredOperation(htSerialize, workerThreadsCount);
1872 					topLevelAccelerationStructure->serialize(vkd, device, *cmdBuffer, storage.get());
1873 					topSerialized.push_back(storage);
1874 
1875 					if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1876 					{
1877 						endCommandBuffer(vkd, *cmdBuffer);
1878 
1879 						submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1880 
1881 						vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1882 						beginCommandBuffer(vkd, *cmdBuffer, 0u);
1883 					}
1884 
1885 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
1886 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
1887 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
1888 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
1889 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
1890 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
1891 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
1892 					topLevelAccelerationStructureCopy->setDeferredOperation(htSerialize, workerThreadsCount);
1893 					topLevelAccelerationStructureCopy->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, storage.get(), 0u);
1894 					break;
1895 				}
1896 				default:
1897 					DE_ASSERT(DE_FALSE);
1898 			}
1899 			topLevelRayTracedPtr = topLevelAccelerationStructureCopy.get();
1900 		}
1901 
1902 		const VkMemoryBarrier preTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
1903 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &preTraceMemoryBarrier);
1904 
1905 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
1906 		{
1907 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
1908 			DE_NULL,															//  const void*							pNext;
1909 			1u,																	//  deUint32							accelerationStructureCount;
1910 			topLevelRayTracedPtr->getPtr(),										//  const VkAccelerationStructureKHR*	pAccelerationStructures;
1911 		};
1912 
1913 		DescriptorSetUpdateBuilder()
1914 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
1915 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
1916 			.update(vkd, device);
1917 
1918 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
1919 
1920 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
1921 
1922 		cmdTraceRays(vkd,
1923 			*cmdBuffer,
1924 			&raygenShaderBindingTableRegion,
1925 			&missShaderBindingTableRegion,
1926 			&hitShaderBindingTableRegion,
1927 			&callableShaderBindingTableRegion,
1928 			m_data.width, m_data.height, 1);
1929 
1930 		const VkMemoryBarrier				postTraceMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
1931 		const VkMemoryBarrier				postCopyMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1932 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
1933 
1934 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u, &resultBufferImageRegion);
1935 
1936 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
1937 	}
1938 	endCommandBuffer(vkd, *cmdBuffer);
1939 
1940 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1941 
1942 	invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(), resultBuffer->getAllocation().getOffset(), pixelCount * sizeof(deUint32));
1943 
1944 	return resultBuffer;
1945 }
1946 
iterateNoWorkers(void)1947 bool RayTracingASBasicTestInstance::iterateNoWorkers (void)
1948 {
1949 	// run test using arrays of pointers
1950 	const de::MovePtr<BufferWithMemory>	buffer		= runTest(0);
1951 
1952 	return m_data.testConfiguration->verifyImage(buffer.get(), m_context, m_data);
1953 }
1954 
iterateWithWorkers(void)1955 bool RayTracingASBasicTestInstance::iterateWithWorkers (void)
1956 {
1957 	de::MovePtr<BufferWithMemory>	singleThreadBufferCPU	= runTest(0);
1958 	const bool						singleThreadValidation	= m_data.testConfiguration->verifyImage(singleThreadBufferCPU.get(), m_context, m_data);
1959 
1960 	de::MovePtr<BufferWithMemory>	multiThreadBufferCPU	= runTest(m_data.workerThreadsCount);
1961 	const bool						multiThreadValidation	= m_data.testConfiguration->verifyImage(multiThreadBufferCPU.get(), m_context, m_data);
1962 
1963 	const deUint32					result					= singleThreadValidation && multiThreadValidation;
1964 
1965 	return result;
1966 }
1967 
iterate(void)1968 tcu::TestStatus RayTracingASBasicTestInstance::iterate (void)
1969 {
1970 	bool result;
1971 
1972 	if (m_data.workerThreadsCount != 0)
1973 		result = iterateWithWorkers();
1974 	else
1975 		result = iterateNoWorkers();
1976 
1977 	if (result)
1978 		return tcu::TestStatus::pass("Pass");
1979 	else
1980 		return tcu::TestStatus::fail("Fail");
1981 }
1982 
1983 // Tests dynamic indexing of acceleration structures
1984 class RayTracingASDynamicIndexingTestCase : public TestCase
1985 {
1986 public:
1987 						RayTracingASDynamicIndexingTestCase			(tcu::TestContext& context, const char* name);
1988 						~RayTracingASDynamicIndexingTestCase		(void) = default;
1989 
1990 	void				checkSupport								(Context& context) const override;
1991 	void				initPrograms								(SourceCollections& programCollection) const override;
1992 	TestInstance*		createInstance								(Context& context) const override;
1993 };
1994 
1995 class RayTracingASDynamicIndexingTestInstance : public TestInstance
1996 {
1997 public:
1998 						RayTracingASDynamicIndexingTestInstance		(Context& context);
1999 						~RayTracingASDynamicIndexingTestInstance	(void) = default;
2000 	tcu::TestStatus		iterate										(void) override;
2001 };
2002 
RayTracingASDynamicIndexingTestCase(tcu::TestContext & context,const char * name)2003 RayTracingASDynamicIndexingTestCase::RayTracingASDynamicIndexingTestCase(tcu::TestContext& context, const char* name)
2004 	: TestCase(context, name, "")
2005 {
2006 }
2007 
checkSupport(Context & context) const2008 void RayTracingASDynamicIndexingTestCase::checkSupport(Context& context) const
2009 {
2010 	commonASTestsCheckSupport(context);
2011 	context.requireDeviceFunctionality("VK_EXT_descriptor_indexing");
2012 }
2013 
initPrograms(SourceCollections & programCollection) const2014 void RayTracingASDynamicIndexingTestCase::initPrograms(SourceCollections& programCollection) const
2015 {
2016 	const vk::SpirVAsmBuildOptions spvBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, true);
2017 	const vk::ShaderBuildOptions glslBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
2018 
2019 	// raygen shader is defined in spir-v as it requires possing pointer to TLAS that was read from ssbo;
2020 	// original spir-v code was generated using following glsl code but resulting spir-v code was modiifed
2021 	//
2022 	// #version 460 core
2023 	// #extension GL_EXT_ray_tracing : require
2024 	// #extension GL_EXT_nonuniform_qualifier : enable
2025 	// #define ARRAY_SIZE 500
2026 	// layout(location = 0) rayPayloadEXT uvec2 payload;	// offset and flag indicating if we are using descriptors or pointers
2027 
2028 	// layout(set = 0, binding = 0) uniform accelerationStructureEXT tlasArray[ARRAY_SIZE];
2029 	// layout(set = 0, binding = 1) readonly buffer topLevelASPointers {
2030 	//     uvec2 ptr[];
2031 	// } tlasPointers;
2032 	// layout(set = 0, binding = 2) readonly buffer topLevelASIndices {
2033 	//     uint idx[];
2034 	// } tlasIndices;
2035 	// layout(set = 0, binding = 3, std430) writeonly buffer Result {
2036 	//     uint value[];
2037 	// } result;
2038 
2039 	// void main()
2040 	// {
2041 	//   float tmin            = 0.0;\n"
2042 	//   float tmax            = 2.0;\n"
2043 	//   vec3  origin          = vec3(0.25f, 0.5f, 1.0);\n"
2044 	//   vec3  direction       = vec3(0.0,0.0,-1.0);\n"
2045 	//   uint  activeTlasIndex = gl_LaunchIDEXT.x;\n"
2046 	//   uint  activeTlasCount = gl_LaunchSizeEXT.x;\n"
2047 	//   uint  tlasIndex       = tlasIndices.idx[nonuniformEXT(activeTlasIndex)];\n"
2048 
2049 	//   atomicAdd(result.value[nonuniformEXT(activeTlasIndex)], 2);\n"
2050 	//   payload = uvec2(activeTlasIndex + activeTlasCount.x, 0);\n"
2051 	//   traceRayEXT(tlasArray[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);\n"
2052 
2053 	//   atomicAdd(result.value[nonuniformEXT(activeTlasIndex + activeTlasCount * 2)], 5);\n"
2054 	//   payload = uvec2(activeTlasIndex + activeTlasCount * 3, 1);\n"
2055 	//   traceRayEXT(tlasArray[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);				// used to generate initial spirv
2056 	//   //traceRayEXT(*tlasPointers.ptr[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);	// not available in glsl but should be done in spirv
2057 	// };
2058 
2059 	const std::string rgenSource =
2060 		"OpCapability RayTracingKHR\n"
2061 		"OpCapability ShaderNonUniform\n"
2062 		"OpExtension \"SPV_EXT_descriptor_indexing\"\n"
2063 		"OpExtension \"SPV_KHR_ray_tracing\"\n"
2064 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2065 		"OpMemoryModel Logical GLSL450\n"
2066 		"OpEntryPoint RayGenerationKHR %4 \"main\" %27 %33 %var_tlas_indices %var_result %60 %var_as_arr_ptr %var_as_pointers_ssbo\n"
2067 		"OpDecorate %27 BuiltIn LaunchIdNV\n"
2068 		"OpDecorate %33 BuiltIn LaunchSizeNV\n"
2069 		"OpDecorate %37 ArrayStride 4\n"
2070 		"OpMemberDecorate %38 0 NonWritable\n"
2071 		"OpMemberDecorate %38 0 Offset 0\n"
2072 		"OpDecorate %38 Block\n"
2073 		"OpDecorate %var_tlas_indices DescriptorSet 0\n"
2074 		"OpDecorate %var_tlas_indices Binding 2\n"
2075 		"OpDecorate %44 NonUniform\n"
2076 		"OpDecorate %46 NonUniform\n"
2077 		"OpDecorate %47 NonUniform\n"
2078 		"OpDecorate %48 ArrayStride 4\n"
2079 		"OpMemberDecorate %49 0 NonReadable\n"
2080 		"OpMemberDecorate %49 0 Offset 0\n"
2081 		"OpDecorate %49 Block\n"
2082 		"OpDecorate %var_result DescriptorSet 0\n"
2083 		"OpDecorate %var_result Binding 3\n"
2084 		"OpDecorate %53 NonUniform\n"
2085 		"OpDecorate %60 Location 0\n"
2086 		"OpDecorate %var_as_arr_ptr DescriptorSet 0\n"
2087 		"OpDecorate %var_as_arr_ptr Binding 0\n"
2088 		"OpDecorate %71 NonUniform\n"
2089 		"OpDecorate %73 NonUniform\n"
2090 		"OpDecorate %74 NonUniform\n"
2091 		"OpDecorate %85 NonUniform\n"
2092 		"OpDecorate %as_index NonUniform\n"
2093 		"OpDecorate %as_device_addres NonUniform\n"
2094 		"OpDecorate %104 ArrayStride 8\n"
2095 		"OpMemberDecorate %105 0 NonWritable\n"
2096 		"OpMemberDecorate %105 0 Offset 0\n"
2097 		"OpDecorate %105 Block\n"
2098 		"OpDecorate %var_as_pointers_ssbo DescriptorSet 0\n"
2099 		"OpDecorate %var_as_pointers_ssbo Binding 1\n"
2100 		// types, constants and variables
2101 		"%2								= OpTypeVoid\n"
2102 		"%3								= OpTypeFunction %2\n"
2103 		"%6								= OpTypeFloat 32\n"
2104 		"%7								= OpTypePointer Function %6\n"
2105 		"%9								= OpConstant %6 0\n"
2106 		"%11							= OpConstant %6 2\n"
2107 		"%12							= OpTypeVector %6 3\n"
2108 		"%13							= OpTypePointer Function %12\n"
2109 		"%15							= OpConstant %6 0.25\n"
2110 		"%16							= OpConstant %6 0.5\n"
2111 		"%17							= OpConstant %6 1\n"
2112 		"%18							= OpConstantComposite %12 %15 %16 %17\n"
2113 		"%20							= OpConstant %6 -1\n"
2114 		"%21							= OpConstantComposite %12 %9 %9 %20\n"
2115 		"%type_uint32					= OpTypeInt 32 0\n"
2116 		"%23							= OpTypePointer Function %type_uint32\n"
2117 		"%25							= OpTypeVector %type_uint32 3\n"
2118 		"%26							= OpTypePointer Input %25\n"
2119 		"%27							= OpVariable %26 Input\n"
2120 		"%28							= OpConstant %type_uint32 0\n"
2121 		"%29							= OpTypePointer Input %type_uint32\n"
2122 		"%33							= OpVariable %26 Input\n"
2123 		"%37							= OpTypeRuntimeArray %type_uint32\n"
2124 		"%38							= OpTypeStruct %37\n"
2125 		"%39							= OpTypePointer StorageBuffer %38\n"
2126 		"%var_tlas_indices				= OpVariable %39 StorageBuffer\n"
2127 		"%type_int32					= OpTypeInt 32 1\n"
2128 		"%c_int32_0						= OpConstant %type_int32 0\n"
2129 		"%45							= OpTypePointer StorageBuffer %type_uint32\n"
2130 		"%48							= OpTypeRuntimeArray %type_uint32\n"
2131 		"%49							= OpTypeStruct %48\n"
2132 		"%50							= OpTypePointer StorageBuffer %49\n"
2133 		"%var_result					= OpVariable %50 StorageBuffer\n"
2134 		"%55							= OpConstant %type_uint32 2\n"
2135 		"%56							= OpConstant %type_uint32 1\n"
2136 		"%58							= OpTypeVector %type_uint32 2\n"
2137 		"%59							= OpTypePointer RayPayloadNV %58\n"
2138 		"%60							= OpVariable %59 RayPayloadNV\n"
2139 		"%type_as						= OpTypeAccelerationStructureKHR\n"
2140 		"%66							= OpConstant %type_uint32 500\n"
2141 		"%67							= OpTypeArray %type_as %66\n"
2142 		"%68							= OpTypePointer UniformConstant %67\n"
2143 		"%var_as_arr_ptr				= OpVariable %68 UniformConstant\n"
2144 		"%72							= OpTypePointer UniformConstant %type_as\n"
2145 		"%75							= OpConstant %type_uint32 16\n"
2146 		"%76							= OpConstant %type_uint32 255\n"
2147 		"%87							= OpConstant %type_uint32 5\n"
2148 		"%91							= OpConstant %type_uint32 3\n"
2149 
2150 		// <changed_section>
2151 		"%104							= OpTypeRuntimeArray %58\n"
2152 		"%105							= OpTypeStruct %104\n"
2153 		"%106							= OpTypePointer StorageBuffer %105\n"
2154 		"%var_as_pointers_ssbo			= OpVariable %106 StorageBuffer\n"
2155 		"%type_uint64_ssbo_ptr			= OpTypePointer StorageBuffer %58\n"
2156 		// </changed_section>
2157 
2158 		// void main()
2159 		"%4								= OpFunction %2 None %3\n"
2160 		"%5								= OpLabel\n"
2161 		"%8								= OpVariable %7 Function\n"
2162 		"%10							= OpVariable %7 Function\n"
2163 		"%14							= OpVariable %13 Function\n"
2164 		"%19							= OpVariable %13 Function\n"
2165 		"%24							= OpVariable %23 Function\n"
2166 		"%32							= OpVariable %23 Function\n"
2167 		"%36							= OpVariable %23 Function\n"
2168 		"OpStore %8 %9\n"
2169 		"OpStore %10 %11\n"
2170 		"OpStore %14 %18\n"
2171 		"OpStore %19 %21\n"
2172 		"%30							= OpAccessChain %29 %27 %28\n"
2173 		"%31							= OpLoad %type_uint32 %30\n"
2174 		"OpStore %24 %31\n"
2175 		"%34							= OpAccessChain %29 %33 %28\n"
2176 		"%35							= OpLoad %type_uint32 %34\n"
2177 		"OpStore %32 %35\n"
2178 		"%43							= OpLoad %type_uint32 %24\n"
2179 		"%44							= OpCopyObject %type_uint32 %43\n"
2180 		"%46							= OpAccessChain %45 %var_tlas_indices %c_int32_0 %44\n"
2181 		"%47							= OpLoad %type_uint32 %46\n"
2182 		"OpStore %36 %47\n"
2183 		// atomicAdd
2184 		"%52							= OpLoad %type_uint32 %24\n"
2185 		"%53							= OpCopyObject %type_uint32 %52\n"
2186 		"%54							= OpAccessChain %45 %var_result %c_int32_0 %53\n"
2187 		"%57							= OpAtomicIAdd %type_uint32 %54 %56 %28 %55\n"
2188 		// setup payload
2189 		"%61							= OpLoad %type_uint32 %24\n"
2190 		"%62							= OpLoad %type_uint32 %32\n"
2191 		"%63							= OpIAdd %type_uint32 %61 %62\n"
2192 		"%64							= OpCompositeConstruct %58 %63 %28\n"
2193 		"OpStore %60 %64\n"
2194 		// trace rays using tlas from array
2195 		"%70							= OpLoad %type_uint32 %36\n"
2196 		"%71							= OpCopyObject %type_uint32 %70\n"
2197 		"%73							= OpAccessChain %72 %var_as_arr_ptr %71\n"
2198 		"%74							= OpLoad %type_as %73\n"
2199 		"%77							= OpLoad %12 %14\n"
2200 		"%78							= OpLoad %6 %8\n"
2201 		"%79							= OpLoad %12 %19\n"
2202 		"%80							= OpLoad %6 %10\n"
2203 		"OpTraceRayKHR %74 %75 %76 %28 %28 %28 %77 %78 %79 %80 %60\n"
2204 		// atomicAdd
2205 		"%81							= OpLoad %type_uint32 %24\n"
2206 		"%82							= OpLoad %type_uint32 %32\n"
2207 		"%83							= OpIMul %type_uint32 %82 %55\n"
2208 		"%84							= OpIAdd %type_uint32 %81 %83\n"
2209 		"%85							= OpCopyObject %type_uint32 %84\n"
2210 		"%86							= OpAccessChain %45 %var_result %c_int32_0 %85\n"
2211 		"%88							= OpAtomicIAdd %type_uint32 %86 %56 %28 %87\n"
2212 		// setup payload
2213 		"%89							= OpLoad %type_uint32 %24\n"
2214 		"%90							= OpLoad %type_uint32 %32\n"
2215 		"%92							= OpIMul %type_uint32 %90 %91\n"
2216 		"%93							= OpIAdd %type_uint32 %89 %92\n"
2217 		"%94							= OpCompositeConstruct %58 %93 %56\n"
2218 		"OpStore %60 %94\n"
2219 		// trace rays using pointers to tlas
2220 		"%95							= OpLoad %type_uint32 %36\n"
2221 		"%as_index						= OpCopyObject %type_uint32 %95\n"
2222 
2223 		// <changed_section> OLD
2224 		"%as_device_addres_ptr			= OpAccessChain %type_uint64_ssbo_ptr %var_as_pointers_ssbo %c_int32_0 %as_index\n"
2225 		"%as_device_addres				= OpLoad %58 %as_device_addres_ptr\n"
2226 		"%as_to_use						= OpConvertUToAccelerationStructureKHR %type_as %as_device_addres\n"
2227 		// </changed_section>
2228 
2229 		"%99							= OpLoad %12 %14\n"
2230 		"%100							= OpLoad %6 %8\n"
2231 		"%101							= OpLoad %12 %19\n"
2232 		"%102							= OpLoad %6 %10\n"
2233 		"OpTraceRayKHR %as_to_use %75 %76 %28 %28 %28 %99 %100 %101 %102 %60\n"
2234 		"OpReturn\n"
2235 		"OpFunctionEnd\n";
2236 	programCollection.spirvAsmSources.add("rgen") << rgenSource << spvBuildOptions;
2237 
2238 	std::string chitSource =
2239 		"#version 460 core\n"
2240 		"#extension GL_EXT_ray_tracing : require\n"
2241 		"#extension GL_EXT_nonuniform_qualifier : enable\n"
2242 		"layout(location = 0) rayPayloadInEXT uvec2 payload;\n"
2243 		"\n"
2244 		"layout(set = 0, binding = 3) writeonly buffer Result {\n"
2245 		"    uint value[];\n"
2246 		"} result;\n"
2247 		"void main()\n"
2248 		"{\n"
2249 		     // payload.y is 0 or 1 so we will add 3 or 7 (just two prime numbers)
2250 		"    atomicAdd(result.value[nonuniformEXT(payload.x)], 3 + payload.y * 4);\n"
2251 		"}\n";
2252 	programCollection.glslSources.add("chit") << glu::ClosestHitSource(chitSource) << glslBuildOptions;
2253 }
2254 
createInstance(Context & context) const2255 TestInstance* RayTracingASDynamicIndexingTestCase::createInstance(Context& context) const
2256 {
2257 	return new RayTracingASDynamicIndexingTestInstance(context);
2258 }
2259 
RayTracingASDynamicIndexingTestInstance(Context & context)2260 RayTracingASDynamicIndexingTestInstance::RayTracingASDynamicIndexingTestInstance(Context& context)
2261 	: vkt::TestInstance(context)
2262 {
2263 }
2264 
iterate(void)2265 tcu::TestStatus RayTracingASDynamicIndexingTestInstance::iterate(void)
2266 {
2267 	const InstanceInterface&	vki							= m_context.getInstanceInterface();
2268 	const DeviceInterface&		vkd							= m_context.getDeviceInterface();
2269 	const VkDevice				device						= m_context.getDevice();
2270 	const VkPhysicalDevice		physicalDevice				= m_context.getPhysicalDevice();
2271 	const deUint32				queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
2272 	const VkQueue				queue						= m_context.getUniversalQueue();
2273 	Allocator&					allocator					= m_context.getDefaultAllocator();
2274 	const deUint32				shaderGroupHandleSize		= getShaderGroupSize(vki, physicalDevice);
2275 	const deUint32				shaderGroupBaseAlignment	= getShaderGroupBaseAlignment(vki, physicalDevice);
2276 	const deUint32				tlasCount					= 500;	// changing this will require also changing shaders
2277 	const deUint32				activeTlasCount				= 32;	// number of tlas out of <tlasCount> that will be active
2278 
2279 	const Move<VkDescriptorSetLayout> descriptorSetLayout = DescriptorSetLayoutBuilder()
2280 		.addArrayBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount, ALL_RAY_TRACING_STAGES)
2281 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)				// pointers to all acceleration structures
2282 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)				// ssbo with indices of all acceleration structures
2283 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)				// ssbo with result values
2284 		.build(vkd, device);
2285 
2286 	const Move<VkDescriptorPool> descriptorPool = DescriptorPoolBuilder()
2287 		.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount)
2288 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2289 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2290 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2291 		.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2292 	const Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
2293 
2294 	de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
2295 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,      createShaderModule(vkd, device, m_context.getBinaryCollection().get("rgen"), 0), 0);
2296 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, createShaderModule(vkd, device, m_context.getBinaryCollection().get("chit"), 0), 1);
2297 
2298 	const Move<VkPipelineLayout>			pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
2299 	Move<VkPipeline>						pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
2300 	de::MovePtr<BufferWithMemory>			raygenShaderBindingTable			= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
2301 	de::MovePtr<BufferWithMemory>			hitShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
2302 
2303 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
2304 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
2305 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
2306 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
2307 
2308 	const VkDeviceSize						pointerBufferSize		= tlasCount * sizeof(VkDeviceAddress);
2309 	const VkBufferCreateInfo				pointerBufferCreateInfo	= makeBufferCreateInfo(pointerBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2310 	de::MovePtr<BufferWithMemory>			pointerBuffer			= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, pointerBufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::DeviceAddress));
2311 
2312 	const VkDeviceSize						indicesBufferSize		= activeTlasCount * sizeof(deUint32);
2313 	const VkBufferCreateInfo				indicesBufferCreateInfo	= makeBufferCreateInfo(indicesBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2314 	de::MovePtr<BufferWithMemory>			indicesBuffer			= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, indicesBufferCreateInfo, MemoryRequirement::HostVisible));
2315 
2316 	const VkDeviceSize						resultBufferSize		= activeTlasCount * sizeof(deUint32) * 4;
2317 	const VkBufferCreateInfo				resultBufferCreateInfo	= makeBufferCreateInfo(resultBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
2318 	de::MovePtr<BufferWithMemory>			resultBuffer			= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
2319 
2320 	const Move<VkCommandPool>				cmdPool					= createCommandPool(vkd, device, 0, queueFamilyIndex);
2321 	const Move<VkCommandBuffer>				cmdBuffer				= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2322 
2323 	de::SharedPtr<BottomLevelAccelerationStructure>				blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2324 	std::vector<de::MovePtr<TopLevelAccelerationStructure>>		tlasVect(tlasCount);
2325 	std::vector<VkDeviceAddress>								tlasPtrVect(tlasCount);
2326 	std::vector<VkAccelerationStructureKHR>						tlasVkVect;
2327 
2328 	// randomly scatter active AS across the range
2329 	deRandom rnd;
2330 	deRandom_init(&rnd, 123);
2331 	std::set<deUint32> asIndicesSet;
2332 	while (asIndicesSet.size() < activeTlasCount)
2333 		asIndicesSet.insert(deRandom_getUint32(&rnd) % tlasCount);
2334 
2335 	// fill indices buffer
2336 	deUint32 helperIndex = 0;
2337 	auto& indicesBufferAlloc	= indicesBuffer->getAllocation();
2338 	deUint32* indicesBufferPtr	= reinterpret_cast<deUint32*>(indicesBufferAlloc.getHostPtr());
2339 	std::for_each(asIndicesSet.begin(), asIndicesSet.end(),
2340 		[&helperIndex, indicesBufferPtr](const deUint32& index)
2341 		{
2342 			indicesBufferPtr[helperIndex++] = index;
2343 		});
2344 	vk::flushAlloc(vkd, device, indicesBufferAlloc);
2345 
2346 	// clear result buffer
2347 	auto& resultBufferAlloc		= resultBuffer->getAllocation();
2348 	void* resultBufferPtr		= resultBufferAlloc.getHostPtr();
2349 	deMemset(resultBufferPtr, 0, static_cast<size_t>(resultBufferSize));
2350 	vk::flushAlloc(vkd, device, resultBufferAlloc);
2351 
2352 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
2353 	{
2354 		// build bottom level acceleration structure
2355 		blas->setGeometryData(
2356 			{
2357 				{ 0.0, 0.0, 0.0 },
2358 				{ 1.0, 0.0, 0.0 },
2359 				{ 0.0, 1.0, 0.0 },
2360 			},
2361 			true,
2362 			VK_GEOMETRY_OPAQUE_BIT_KHR
2363 		);
2364 
2365 		blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2366 
2367 		// build top level acceleration structures
2368 		for (deUint32 tlasIndex = 0; tlasIndex < tlasCount; ++tlasIndex)
2369 		{
2370 			auto& tlas = tlasVect[tlasIndex];
2371 			tlas = makeTopLevelAccelerationStructure();
2372 			tlas->setInstanceCount(1);
2373 			tlas->addInstance(blas);
2374 			if (!asIndicesSet.count(tlasIndex))
2375 			{
2376 				// tlas that are not in asIndicesSet should be empty but it is hard to do
2377 				// that with current cts utils so we are marking them as inactive instead
2378 				tlas->setInactiveInstances(true);
2379 			}
2380 			tlas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2381 
2382 			// get acceleration structure device address
2383 			const VkAccelerationStructureDeviceAddressInfoKHR addressInfo =
2384 			{
2385 				VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,	// VkStructureType				sType
2386 				DE_NULL,															// const void*					pNext
2387 				*tlas->getPtr()														// VkAccelerationStructureKHR	accelerationStructure
2388 			};
2389 			VkDeviceAddress vkda = vkd.getAccelerationStructureDeviceAddressKHR(device, &addressInfo);
2390 			tlasPtrVect[tlasIndex] = vkda;
2391 		}
2392 
2393 		// fill pointer buffer
2394 		vkd.cmdUpdateBuffer(*cmdBuffer, **pointerBuffer, 0, pointerBufferSize, tlasPtrVect.data());
2395 
2396 		// wait for data transfers
2397 		const VkMemoryBarrier bufferUploadBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
2398 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &bufferUploadBarrier, 1u);
2399 
2400 		// wait for as build
2401 		const VkMemoryBarrier asBuildBarrier = makeMemoryBarrier(VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR);
2402 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &asBuildBarrier, 1u);
2403 
2404 		tlasVkVect.reserve(tlasCount);
2405 		for (auto& tlas : tlasVect)
2406 			tlasVkVect.push_back(*tlas->getPtr());
2407 
2408 		VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet =
2409 		{
2410 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	// VkStructureType						sType;
2411 			DE_NULL,															// const void*							pNext;
2412 			tlasCount,															// deUint32								accelerationStructureCount;
2413 			tlasVkVect.data(),													// const VkAccelerationStructureKHR*	pAccelerationStructures;
2414 		};
2415 
2416 		const vk::VkDescriptorBufferInfo pointerBufferInfo	= makeDescriptorBufferInfo(**pointerBuffer, 0u, VK_WHOLE_SIZE);
2417 		const vk::VkDescriptorBufferInfo indicesBufferInfo	= makeDescriptorBufferInfo(**indicesBuffer, 0u, VK_WHOLE_SIZE);
2418 		const vk::VkDescriptorBufferInfo resultInfo			= makeDescriptorBufferInfo(**resultBuffer,  0u, VK_WHOLE_SIZE);
2419 
2420 		DescriptorSetUpdateBuilder()
2421 			.writeArray (*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount, &accelerationStructureWriteDescriptorSet)
2422 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &pointerBufferInfo)
2423 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indicesBufferInfo)
2424 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(3u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo)
2425 			.update(vkd, device);
2426 
2427 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
2428 
2429 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
2430 
2431 		cmdTraceRays(vkd,
2432 			*cmdBuffer,
2433 			&raygenShaderBindingTableRegion,
2434 			&missShaderBindingTableRegion,
2435 			&hitShaderBindingTableRegion,
2436 			&callableShaderBindingTableRegion,
2437 			activeTlasCount, 1, 1);
2438 
2439 		const VkMemoryBarrier postTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
2440 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
2441 	}
2442 	endCommandBuffer(vkd, *cmdBuffer);
2443 
2444 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2445 
2446 	invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(), resultBuffer->getAllocation().getOffset(), resultBufferSize);
2447 
2448 	// verify result buffer
2449 	deUint32		failures	= 0;
2450 	const deUint32*	resultPtr	= reinterpret_cast<deUint32*>(resultBuffer->getAllocation().getHostPtr());
2451 	for (deUint32 index = 0; index < activeTlasCount; ++index)
2452 	{
2453 		failures += (resultPtr[0 * activeTlasCount + index] != 2) +
2454 					(resultPtr[1 * activeTlasCount + index] != 3) +
2455 					(resultPtr[2 * activeTlasCount + index] != 5) +
2456 					(resultPtr[3 * activeTlasCount + index] != 7);
2457 	}
2458 
2459 	if (failures)
2460 		return tcu::TestStatus::fail(de::toString(failures) + " failures, " + de::toString(4 * activeTlasCount - failures) + " are ok");
2461 	return tcu::TestStatus::pass("Pass");
2462 }
2463 
2464 // Tests the vkGetDeviceAccelerationStructureKHR routine
2465 class RayTracingDeviceASCompabilityKHRTestInstance : public TestInstance
2466 {
2467 public:
RayTracingDeviceASCompabilityKHRTestInstance(Context & context,const de::SharedPtr<TestParams> params)2468 					RayTracingDeviceASCompabilityKHRTestInstance	(Context& context, const de::SharedPtr<TestParams> params)
2469 						: TestInstance	(context)
2470 						, m_params		(params)
2471 					{
2472 					}
2473 
2474 	tcu::TestStatus	iterate											(void) override;
2475 
2476 protected:
2477 	template<class ASType>
2478 		bool		performTest										(VkCommandPool								cmdPool,
2479 																	 VkCommandBuffer							cmdBuffer,
2480 																	 const std::vector<de::SharedPtr<ASType>>	sourceStructures,
2481 																	 const std::vector<VkDeviceSize>&			copySizes,
2482 																	 const std::vector<VkDeviceSize>&			compactSizes);
2483 
2484 	VkAccelerationStructureCompatibilityKHR
2485 					getDeviceASCompatibilityKHR						(const deUint8*		versionInfoData);
2486 	std::string		getUUIDsString									(const deUint8* header) const;
2487 
2488 
2489 private:
2490 	const de::SharedPtr<TestParams>	m_params;
2491 };
2492 
2493 // Tests for updating botto-level AS(s) address(es) in top-level AS's header
2494 class RayTracingHeaderBottomAddressTestInstance : public TestInstance
2495 {
2496 public:
RayTracingHeaderBottomAddressTestInstance(Context & context,const de::SharedPtr<TestParams> params)2497 					RayTracingHeaderBottomAddressTestInstance						(Context&											context,
2498 																					 const de::SharedPtr<TestParams>					params)
2499 						: TestInstance	(context)
2500 						, m_params		(params)
2501 					{
2502 					}
2503 	tcu::TestStatus	iterate															(void) override;
2504 
2505 protected:
2506 	de::SharedPtr<TopLevelAccelerationStructure>	prepareTopAccelerationStructure	(const DeviceInterface&								vk,
2507 																					 VkDevice											device,
2508 																					 Allocator&											allocator,
2509 																					 VkCommandBuffer									cmdBuffer);
2510 
2511 	bool											areAddressesTheSame				(const std::vector<deUint64>&						addresses,
2512 																					 const SerialStorage::AccelerationStructureHeader*	header);
2513 
2514 	bool											areAddressesDifferent			(const std::vector<deUint64>&						addresses1,
2515 																					 const std::vector<deUint64>&						addresses2);
2516 private:
2517 	const de::SharedPtr<TestParams>	m_params;
2518 };
2519 
2520 class RayTracingDeviceASCompabilityKHRTestCase : public TestCase
2521 {
2522 public:
RayTracingDeviceASCompabilityKHRTestCase(tcu::TestContext & ctx,const char * name,const de::SharedPtr<TestParams> params)2523 					RayTracingDeviceASCompabilityKHRTestCase	(tcu::TestContext& ctx, const char* name, const de::SharedPtr<TestParams> params)
2524 						: TestCase(ctx, name, std::string())
2525 						, m_params(params)
2526 					{
2527 					}
2528 
2529 	void			checkSupport								(Context&			context) const override;
createInstance(Context & context) const2530 	TestInstance*	createInstance								(Context&			context) const override
2531 	{
2532 		return new RayTracingDeviceASCompabilityKHRTestInstance(context, m_params);
2533 	}
2534 
2535 private:
2536 	de::SharedPtr<TestParams>	m_params;
2537 };
2538 
2539 class RayTracingHeaderBottomAddressTestCase : public TestCase
2540 {
2541 public:
RayTracingHeaderBottomAddressTestCase(tcu::TestContext & ctx,const char * name,const de::SharedPtr<TestParams> params)2542 					RayTracingHeaderBottomAddressTestCase	(tcu::TestContext& ctx, const char* name, const de::SharedPtr<TestParams> params)
2543 						: TestCase(ctx, name, std::string())
2544 						, m_params(params)
2545 					{
2546 					}
2547 
2548 	void			checkSupport								(Context&			context) const override;
createInstance(Context & context) const2549 	TestInstance*	createInstance								(Context&			context) const override
2550 	{
2551 		return new RayTracingHeaderBottomAddressTestInstance(context, m_params);
2552 	}
2553 
2554 private:
2555 	de::SharedPtr<TestParams>	m_params;
2556 };
2557 
checkSupport(Context & context) const2558 void RayTracingDeviceASCompabilityKHRTestCase ::checkSupport (Context& context) const
2559 {
2560 	context.requireInstanceFunctionality("VK_KHR_get_physical_device_properties2");
2561 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
2562 
2563 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
2564 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
2565 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
2566 
2567 	// Check supported vertex format.
2568 	checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(), m_params->vertexFormat);
2569 }
2570 
checkSupport(Context & context) const2571 void RayTracingHeaderBottomAddressTestCase ::checkSupport (Context& context) const
2572 {
2573 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
2574 
2575 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
2576 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
2577 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
2578 
2579 	// Check supported vertex format.
2580 	checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(), m_params->vertexFormat);
2581 }
2582 
getDeviceASCompatibilityKHR(const deUint8 * versionInfoData)2583 VkAccelerationStructureCompatibilityKHR	RayTracingDeviceASCompabilityKHRTestInstance::getDeviceASCompatibilityKHR (const deUint8* versionInfoData)
2584 {
2585 	const VkDevice								device		= m_context.getDevice();
2586 	const DeviceInterface&						vkd			= m_context.getDeviceInterface();
2587 
2588 	VkAccelerationStructureCompatibilityKHR		compability = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_MAX_ENUM_KHR;
2589 
2590 	const VkAccelerationStructureVersionInfoKHR versionInfo =
2591 	{
2592 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_VERSION_INFO_KHR,	// sType
2593 		DE_NULL,													// pNext
2594 		versionInfoData												// pVersionData
2595 	};
2596 
2597 	vkd.getDeviceAccelerationStructureCompatibilityKHR(device, &versionInfo, &compability);
2598 
2599 	return compability;
2600 }
2601 
getUUIDsString(const deUint8 * header) const2602 std::string RayTracingDeviceASCompabilityKHRTestInstance::getUUIDsString (const deUint8* header) const
2603 {
2604 	std::stringstream		ss;
2605 
2606 	int			offset		= 0;
2607 	const int	widths[]	= { 4, 2, 2, 2, 6 };
2608 
2609 	for (int h = 0; h < 2; ++h)
2610 	{
2611 		if (h) ss << ' ';
2612 
2613 		for (int w = 0; w < DE_LENGTH_OF_ARRAY(widths); ++w)
2614 		{
2615 			if (w) ss << '-';
2616 
2617 			for (int i = 0; i < widths[w]; ++i)
2618 				ss << std::hex << std::uppercase << static_cast<int>(header[i + offset]);
2619 
2620 			offset += widths[w];
2621 		}
2622 	}
2623 
2624 	return ss.str();
2625 }
2626 
iterate(void)2627 tcu::TestStatus RayTracingDeviceASCompabilityKHRTestInstance::iterate (void)
2628 {
2629 	const DeviceInterface&			vkd					= m_context.getDeviceInterface();
2630 	const VkDevice					device				= m_context.getDevice();
2631 	const deUint32					queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
2632 	const VkQueue					queue				= m_context.getUniversalQueue();
2633 	Allocator&						allocator			= m_context.getDefaultAllocator();
2634 
2635 	const Move<VkCommandPool>		cmdPool				= createCommandPool(vkd, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
2636 	const Move<VkCommandBuffer>		cmdBuffer			= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2637 
2638 	bool							result				= false;
2639 
2640 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomStructures;
2641 	std::vector<VkAccelerationStructureKHR>							bottomHandles;
2642 	std::vector<de::SharedPtr<TopLevelAccelerationStructure>>		topStructures;
2643 	std::vector<VkAccelerationStructureKHR>							topHandles;
2644 	Move<VkQueryPool>												queryPoolCompact;
2645 	Move<VkQueryPool>												queryPoolSerial;
2646 	std::vector<VkDeviceSize>										compactSizes;
2647 	std::vector<VkDeviceSize>										serialSizes;
2648 
2649 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
2650 
2651 	bottomStructures = m_params->testConfiguration->initBottomAccelerationStructures(m_context, *m_params);
2652 	for (auto& blas : bottomStructures)
2653 	{
2654 		blas->setBuildType(m_params->buildType);
2655 		blas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
2656 		blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2657 		bottomHandles.push_back(*(blas->getPtr()));
2658 	}
2659 
2660 	if (m_params->operationTarget == OT_TOP_ACCELERATION)
2661 	{
2662 		de::MovePtr<TopLevelAccelerationStructure> tlas = m_params->testConfiguration->initTopAccelerationStructure(m_context, *m_params, bottomStructures);
2663 		tlas->setBuildType					(m_params->buildType);
2664 		tlas->setBuildFlags				(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
2665 		tlas->createAndBuild				(vkd, device, *cmdBuffer, allocator);
2666 		topHandles.push_back							(*(tlas->getPtr()));
2667 		topStructures.push_back(de::SharedPtr<TopLevelAccelerationStructure>(tlas.release()));
2668 	}
2669 
2670 	const deUint32 queryCount = deUint32((m_params->operationTarget == OT_BOTTOM_ACCELERATION) ? bottomStructures.size() : topStructures.size());
2671 	const std::vector<VkAccelerationStructureKHR>& handles = (m_params->operationTarget == OT_BOTTOM_ACCELERATION) ? bottomHandles : topHandles;
2672 
2673 	// query compact size
2674 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2675 		queryPoolCompact = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount);
2676 	queryAccelerationStructureSize(vkd, device, *cmdBuffer, handles, m_params->buildType, *queryPoolCompact, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, compactSizes);
2677 
2678 	// query serialization size
2679 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2680 		queryPoolSerial = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
2681 	queryAccelerationStructureSize(vkd, device, *cmdBuffer, handles, m_params->buildType, queryPoolSerial.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, serialSizes);
2682 
2683 	endCommandBuffer(vkd, *cmdBuffer);
2684 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2685 
2686 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2687 	{
2688 		VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolCompact, 0u, queryCount, queryCount * sizeof(VkDeviceSize), compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2689 		VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolSerial, 0u, queryCount, queryCount * sizeof(VkDeviceSize), serialSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2690 
2691 		vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2692 	}
2693 
2694 	if (m_params->operationTarget == OT_BOTTOM_ACCELERATION)
2695 		result = performTest<BottomLevelAccelerationStructure>(*cmdPool, *cmdBuffer, bottomStructures, compactSizes, serialSizes);
2696 	else
2697 		result = performTest<TopLevelAccelerationStructure>(*cmdPool, *cmdBuffer, topStructures, compactSizes, serialSizes);
2698 
2699 	return result ? tcu::TestStatus::pass("") : tcu::TestStatus::fail("");
2700 }
2701 
2702 template<class ASType>
performTest(VkCommandPool cmdPool,VkCommandBuffer cmdBuffer,const std::vector<de::SharedPtr<ASType>> sourceStructures,const std::vector<VkDeviceSize> & compactSizes,const std::vector<VkDeviceSize> & serialSizes)2703 bool RayTracingDeviceASCompabilityKHRTestInstance::performTest (VkCommandPool								cmdPool,
2704 																VkCommandBuffer								cmdBuffer,
2705 																const std::vector<de::SharedPtr<ASType>>	sourceStructures,
2706 																const std::vector<VkDeviceSize>&			compactSizes,
2707 																const std::vector<VkDeviceSize>&			serialSizes)
2708 {
2709 	const VkQueue								queue					= m_context.getUniversalQueue();
2710 	const VkDevice								device					= m_context.getDevice();
2711 	const DeviceInterface&						vkd						= m_context.getDeviceInterface();
2712 	Allocator&									allocator				= m_context.getDefaultAllocator();
2713 
2714 	const deUint32								sourceStructuresCount	= deUint32(sourceStructures.size());
2715 
2716 	Move<VkQueryPool>							queryPoolCompactSerial;
2717 	std::vector<VkDeviceSize>					compactSerialSizes;
2718 
2719 	std::vector<VkAccelerationStructureKHR>		compactHandles;
2720 	std::vector<de::SharedPtr<ASType>>			compactStructures;
2721 
2722 	std::vector<de::SharedPtr<SerialStorage>>	sourceSerialized;
2723 	std::vector<de::SharedPtr<SerialStorage>>	compactSerialized;
2724 
2725 
2726 	// make compact copy of acceleration structure
2727 	{
2728 		beginCommandBuffer(vkd, cmdBuffer, 0u);
2729 
2730 		for (size_t i = 0; i < sourceStructuresCount; ++i)
2731 		{
2732 			de::MovePtr<ASType> asCopy = makeAccelerationStructure<ASType>();
2733 			asCopy->setBuildType(m_params->buildType);
2734 			asCopy->createAndCopyFrom(vkd, device, cmdBuffer, allocator, sourceStructures[i].get(), compactSizes[i], 0u);
2735 			compactHandles.push_back(*(asCopy->getPtr()));
2736 			compactStructures.push_back(de::SharedPtr<ASType>(asCopy.release()));
2737 		}
2738 
2739 		// query serialization size of compact acceleration structures
2740 		if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2741 			queryPoolCompactSerial = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, sourceStructuresCount);
2742 		queryAccelerationStructureSize(vkd, device, cmdBuffer, compactHandles, m_params->buildType, *queryPoolCompactSerial, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, compactSerialSizes);
2743 
2744 		endCommandBuffer(vkd, cmdBuffer);
2745 		submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2746 
2747 		if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2748 		{
2749 			VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolCompactSerial, 0u, sourceStructuresCount, (sourceStructuresCount * sizeof(VkDeviceSize)), compactSerialSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2750 			vkd.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2751 		}
2752 	}
2753 
2754 	// serialize both structures to memory
2755 	{
2756 		beginCommandBuffer(vkd, cmdBuffer, 0u);
2757 
2758 		for (size_t i = 0 ; i < sourceStructuresCount; ++i)
2759 		{
2760 			sourceSerialized.push_back(de::SharedPtr<SerialStorage>(new SerialStorage(vkd, device, allocator, m_params->buildType, serialSizes[i])));
2761 			sourceStructures[i]->serialize(vkd, device, cmdBuffer, sourceSerialized.back().get());
2762 
2763 			compactSerialized.push_back(de::SharedPtr<SerialStorage>(new SerialStorage(vkd, device, allocator, m_params->buildType, compactSerialSizes[i])));
2764 			compactStructures[i]->serialize(vkd, device, cmdBuffer, compactSerialized.back().get());
2765 		}
2766 
2767 		endCommandBuffer(vkd, cmdBuffer);
2768 		submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2769 	}
2770 
2771 	// verify compatibility
2772 	bool result = true;
2773 	for (size_t i = 0; result && (i < sourceStructuresCount); ++i)
2774 	{
2775 		const deUint8* s_header = static_cast<const deUint8*>(sourceSerialized[i]->getHostAddressConst().hostAddress);
2776 		const deUint8* c_header = static_cast<const deUint8*>(compactSerialized[i]->getHostAddressConst().hostAddress);
2777 
2778 		const auto s_compability = getDeviceASCompatibilityKHR(s_header);
2779 		const auto c_compability = getDeviceASCompatibilityKHR(c_header);
2780 
2781 		result &= ((s_compability == c_compability) && (s_compability == VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR));
2782 
2783 		if (!result)
2784 		{
2785 			tcu::TestLog& log = m_context.getTestContext().getLog();
2786 
2787 			log << tcu::TestLog::Message << getUUIDsString(s_header) << " serialized AS compability failed" << tcu::TestLog::EndMessage;
2788 			log << tcu::TestLog::Message << getUUIDsString(c_header) << " compact AS compability failed" << tcu::TestLog::EndMessage;
2789 		}
2790 	}
2791 
2792 	return result;
2793 }
2794 
2795 de::SharedPtr<TopLevelAccelerationStructure>
prepareTopAccelerationStructure(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer)2796 RayTracingHeaderBottomAddressTestInstance::prepareTopAccelerationStructure (const DeviceInterface&	vk,
2797 																			VkDevice				device,
2798 																			Allocator&				allocator,
2799 																			VkCommandBuffer			cmdBuffer)
2800 {
2801 	const std::vector<tcu::Vec3>									geometryData =
2802 	{
2803 		{ 0.0, 0.0, 0.0 },
2804 		{ 1.0, 0.0, 0.0 },
2805 		{ 0.0, 1.0, 0.0 },
2806 	};
2807 
2808 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottoms;
2809 
2810 	if (TTT_IDENTICAL_INSTANCES == m_params->topTestType)
2811 	{
2812 		auto blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2813 		blas->setBuildType(m_params->buildType);
2814 		blas->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
2815 		blas->createAndBuild(vk, device, cmdBuffer, allocator);
2816 		for (deUint32 i = 0; i < m_params->width; ++i)
2817 		{
2818 			bottoms.emplace_back(blas);
2819 		}
2820 	}
2821 	else if (TTT_DIFFERENT_INSTANCES == m_params->topTestType)
2822 	{
2823 		for (deUint32 i = 0; i < m_params->width; ++i)
2824 		{
2825 			auto blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2826 			blas->setBuildType(m_params->buildType);
2827 			blas->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
2828 			blas->createAndBuild(vk, device, cmdBuffer, allocator);
2829 			bottoms.emplace_back(blas);
2830 		}
2831 	}
2832 	else // TTT_MIX_INSTANCES == m_params->topTestType
2833 	{
2834 		for (deUint32 i = 0; i < m_params->width; ++i)
2835 		{
2836 			{
2837 				auto blas1 = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2838 				blas1->setBuildType(m_params->buildType);
2839 				blas1->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
2840 				blas1->createAndBuild(vk, device, cmdBuffer, allocator);
2841 				bottoms.emplace_back(blas1);
2842 			}
2843 
2844 			{
2845 				auto blas2 = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2846 				blas2->setBuildType(m_params->buildType);
2847 				blas2->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
2848 				blas2->createAndBuild(vk, device, cmdBuffer, allocator);
2849 				bottoms.emplace_back(blas2);
2850 			}
2851 		}
2852 
2853 	}
2854 
2855 	const std::size_t												instanceCount = bottoms.size();
2856 
2857 	de::MovePtr<TopLevelAccelerationStructure>						tlas = makeTopLevelAccelerationStructure();
2858 	tlas->setBuildType(m_params->buildType);
2859 	tlas->setInstanceCount(instanceCount);
2860 
2861 	for (std::size_t i = 0; i < instanceCount; ++i)
2862 	{
2863 		const VkTransformMatrixKHR	transformMatrixKHR =
2864 		{
2865 			{	//  float	matrix[3][4];
2866 				{ 1.0f, 0.0f, 0.0f, (float)i },
2867 				{ 0.0f, 1.0f, 0.0f, (float)i },
2868 				{ 0.0f, 0.0f, 1.0f, 0.0f },
2869 			}
2870 		};
2871 		tlas->addInstance(bottoms[i], transformMatrixKHR, 0, m_params->cullMask, 0u, getCullFlags((m_params->cullFlags)));
2872 	}
2873 
2874 	tlas->createAndBuild(vk, device, cmdBuffer, allocator);
2875 
2876 	return de::SharedPtr<TopLevelAccelerationStructure>(tlas.release());
2877 }
2878 
iterate(void)2879 tcu::TestStatus RayTracingHeaderBottomAddressTestInstance::iterate (void)
2880 {
2881 	const DeviceInterface&								vkd				= m_context.getDeviceInterface();
2882 	const VkDevice										device			= m_context.getDevice();
2883 	const deUint32										familyIndex		= m_context.getUniversalQueueFamilyIndex();
2884 	const VkQueue										queue			= m_context.getUniversalQueue();
2885 	Allocator&											allocator		= m_context.getDefaultAllocator();
2886 
2887 	const Move<VkCommandPool>							cmdPool			= createCommandPool(vkd, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, familyIndex);
2888 	const Move<VkCommandBuffer>							cmdBuffer		= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2889 
2890 	beginCommandBuffer(vkd, *cmdBuffer, 0);
2891 	de::SharedPtr<TopLevelAccelerationStructure>		src				= prepareTopAccelerationStructure(vkd, device, allocator, *cmdBuffer);
2892 	endCommandBuffer(vkd, *cmdBuffer);
2893 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
2894 
2895 	de::MovePtr<TopLevelAccelerationStructure>			dst				= makeTopLevelAccelerationStructure();
2896 
2897 	const std::vector<deUint64>							inAddrs			= src->getSerializingAddresses(vkd, device);
2898 	const std::vector<VkDeviceSize>						inSizes			= src->getSerializingSizes(vkd, device, queue, familyIndex);
2899 
2900 	const SerialInfo									serialInfo		(inAddrs, inSizes);
2901 	SerialStorage										deepStorage		(vkd, device, allocator, m_params->buildType, serialInfo);
2902 
2903 	// make deep serialization - top-level AS width bottom-level structures that it owns
2904 	vkd.resetCommandBuffer(*cmdBuffer, 0);
2905 	beginCommandBuffer(vkd, *cmdBuffer, 0);
2906 	src->serialize(vkd, device, *cmdBuffer, &deepStorage);
2907 	endCommandBuffer(vkd, *cmdBuffer);
2908 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
2909 
2910 	// deserialize all from the previous step to a new top-level AS
2911 	// bottom-level structure addresses should be updated when deep data is deserialized
2912 	vkd.resetCommandBuffer(*cmdBuffer, 0);
2913 	beginCommandBuffer(vkd, *cmdBuffer, 0);
2914 	dst->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, &deepStorage);
2915 	endCommandBuffer(vkd, *cmdBuffer);
2916 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
2917 
2918 	SerialStorage										shallowStorage	(vkd, device, allocator, m_params->buildType, inSizes[0]);
2919 
2920 	// make shallow serialization - only top-level AS without bottom-level structures
2921 	vkd.resetCommandBuffer(*cmdBuffer, 0);
2922 	beginCommandBuffer(vkd, *cmdBuffer, 0);
2923 	dst->serialize(vkd, device, *cmdBuffer, &shallowStorage);
2924 	endCommandBuffer(vkd, *cmdBuffer);
2925 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
2926 
2927 	// get data to verification
2928 	const std::vector<deUint64>							outAddrs		= dst->getSerializingAddresses(vkd, device);
2929 	const SerialStorage::AccelerationStructureHeader*	header			= shallowStorage.getASHeader();
2930 
2931 	return (areAddressesDifferent(inAddrs, outAddrs) && areAddressesTheSame(outAddrs, header)) ? tcu::TestStatus::pass("") : tcu::TestStatus::fail("");
2932 }
2933 
areAddressesTheSame(const std::vector<deUint64> & addresses,const SerialStorage::AccelerationStructureHeader * header)2934 bool RayTracingHeaderBottomAddressTestInstance::areAddressesTheSame (const std::vector<deUint64>& addresses, const SerialStorage::AccelerationStructureHeader* header)
2935 {
2936 	const deUint32 cbottoms = deUint32(addresses.size() - 1);
2937 
2938 	// header should contain the same number of handles as serialized/deserialized top-level AS
2939 	if (cbottoms != header->handleCount) return false;
2940 
2941 	std::set<deUint64> refAddrs;
2942 	std::set<deUint64> checkAddrs;
2943 
2944 	// distinct, squach and sort address list
2945 	for (deUint32 i = 0; i < cbottoms; ++i)
2946 	{
2947 		refAddrs.insert(addresses[i+1]);
2948 		checkAddrs.insert(header->handleArray[i]);
2949 	}
2950 
2951 	return std::equal(refAddrs.begin(), refAddrs.end(), checkAddrs.begin());
2952 }
2953 
areAddressesDifferent(const std::vector<deUint64> & addresses1,const std::vector<deUint64> & addresses2)2954 bool RayTracingHeaderBottomAddressTestInstance::areAddressesDifferent (const std::vector<deUint64>& addresses1, const std::vector<deUint64>& addresses2)
2955 {
2956 	// the number of addresses must be equal
2957 	if (addresses1.size() != addresses2.size())
2958 		return false;
2959 
2960 	// adresses of top-level AS must differ
2961 	if (addresses1[0] == addresses2[0])
2962 		return false;
2963 
2964 	std::set<deUint64>	addrs1;
2965 	std::set<deUint64>	addrs2;
2966 	deUint32			matches		= 0;
2967 	const deUint32		cbottoms	= deUint32(addresses1.size() - 1);
2968 
2969 	for (deUint32 i = 0; i < cbottoms; ++i)
2970 	{
2971 		addrs1.insert(addresses1[i+1]);
2972 		addrs2.insert(addresses2[i+1]);
2973 	}
2974 
2975 	// the first addresses set must not contain any address from the second addresses set
2976 	for (auto& addr1 : addrs1)
2977 	{
2978 		if (addrs2.end() != addrs2.find(addr1))
2979 			++matches;
2980 	}
2981 
2982 	return (matches == 0);
2983 }
2984 
2985 template<class X, class... Y>
makeShared(Y &&...ctorArgs)2986 inline de::SharedPtr<X> makeShared(Y&&... ctorArgs) {
2987 	return de::SharedPtr<X>(new X(std::forward<Y>(ctorArgs)...));
2988 }
2989 template<class X, class... Y>
makeMovePtr(Y &&...ctorArgs)2990 inline de::MovePtr<X> makeMovePtr(Y&&... ctorArgs) {
2991 	return de::MovePtr<X>(new X(std::forward<Y>(ctorArgs)...));
2992 }
2993 template<class X>
makeSharedFrom(const X & x)2994 inline de::SharedPtr<X> makeSharedFrom(const X& x) {
2995 	return makeShared<X>(x);
2996 }
2997 
2998 struct QueryPoolResultsParams
2999 {
3000 	enum class Type
3001 	{
3002 		StructureSize,
3003 		PointerCount
3004 	}									queryType;
3005 	VkAccelerationStructureBuildTypeKHR	buildType;
3006 	deUint32							blasCount;
3007 	bool								inVkBuffer;
3008 	bool								compacted;
3009 };
3010 
3011 typedef de::SharedPtr<const QueryPoolResultsParams> QueryPoolResultsParamsPtr;
3012 
3013 struct ASInterface;
3014 typedef de::SharedPtr<ASInterface> ASInterfacePtr;
3015 
3016 class QueryPoolResultsInstance : public TestInstance
3017 {
3018 public:
3019 	using TlasPtr = de::SharedPtr<TopLevelAccelerationStructure>;
3020 	using BlasPtr = de::SharedPtr<BottomLevelAccelerationStructure>;
3021 
QueryPoolResultsInstance(Context & context,QueryPoolResultsParamsPtr params)3022 				QueryPoolResultsInstance	(Context&						context,
3023 											 QueryPoolResultsParamsPtr		params)
3024 					: TestInstance	(context)
3025 					, m_params		(params) {}
3026 	auto		prepareBottomAccStructures	(const DeviceInterface&			vk,
3027 											 VkDevice						device,
3028 											 Allocator&						allocator,
3029 											 VkCommandBuffer				cmdBuffer) ->std::vector<BlasPtr>;
3030 	TlasPtr		prepareTopAccStructure		(const DeviceInterface&			vk,
3031 											 VkDevice						device,
3032 											 Allocator&						allocator,
3033 											 VkCommandBuffer				cmdBuffer,
3034 											 const std::vector<BlasPtr>&	bottoms);
3035 protected:
3036 	const QueryPoolResultsParamsPtr	m_params;
3037 };
3038 
3039 struct ASInterface
3040 {
3041 	virtual VkAccelerationStructureKHR getPtr() const = 0;
3042 	virtual VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const = 0;
3043 	virtual ASInterfacePtr clone (Context& ctx, VkAccelerationStructureBuildTypeKHR buildType, const VkCommandBuffer cmd, VkDeviceSize size) = 0;
3044 };
3045 
3046 template<class> struct ASAllocator;
3047 template<> struct ASAllocator<QueryPoolResultsInstance::TlasPtr>
3048 {
3049 	typedef QueryPoolResultsInstance::TlasPtr TlasPtr;
allocvkt::RayTracing::__anonb50811900111::ASAllocator3050 	static TlasPtr alloc() { return TlasPtr(makeTopLevelAccelerationStructure().release()); }
3051 };
3052 template<> struct ASAllocator<QueryPoolResultsInstance::BlasPtr>
3053 {
3054 	typedef QueryPoolResultsInstance::BlasPtr BlasPtr;
allocvkt::RayTracing::__anonb50811900111::ASAllocator3055 	static BlasPtr alloc() { return BlasPtr(makeBottomLevelAccelerationStructure().release()); }
3056 };
3057 
3058 template<class SharedPtrType> struct ASInterfaceImpl : ASInterface
3059 {
3060 	SharedPtrType	m_source;
ASInterfaceImplvkt::RayTracing::__anonb50811900111::ASInterfaceImpl3061 	ASInterfaceImpl (SharedPtrType src) : m_source(src) {}
getPtrvkt::RayTracing::__anonb50811900111::ASInterfaceImpl3062 	virtual VkAccelerationStructureKHR getPtr() const override
3063 	{
3064 		return *m_source->getPtr();
3065 	}
getStructureBuildSizesvkt::RayTracing::__anonb50811900111::ASInterfaceImpl3066 	virtual VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const override
3067 	{
3068 		return m_source->getStructureBuildSizes();
3069 	}
clonevkt::RayTracing::__anonb50811900111::ASInterfaceImpl3070 	virtual ASInterfacePtr clone (Context& ctx, VkAccelerationStructureBuildTypeKHR buildType, const VkCommandBuffer cmd, VkDeviceSize size) override
3071 	{
3072 		const DeviceInterface&	vk				= ctx.getDeviceInterface();
3073 		const VkDevice			device			= ctx.getDevice();
3074 		Allocator&				allocator		= ctx.getDefaultAllocator();
3075 
3076 		auto ptr = ASAllocator<SharedPtrType>::alloc();
3077 		ptr->setBuildType(buildType);
3078 		ptr->setBuildFlags(m_source->getBuildFlags());
3079 		ptr->create(vk, device, allocator, size);
3080 		ptr->copyFrom(vk, device, cmd, m_source.get(), false);
3081 		return de::SharedPtr<ASInterface>(new ASInterfaceImpl(ptr));
3082 	}
3083 };
3084 
makeASInterfacePtr(SharedPtrType asPtr)3085 template<class SharedPtrType> ASInterfacePtr makeASInterfacePtr (SharedPtrType asPtr)
3086 {
3087 	return ASInterfacePtr(new ASInterfaceImpl<SharedPtrType>(asPtr));
3088 }
3089 
3090 class QueryPoolResultsSizeInstance : public QueryPoolResultsInstance
3091 {
3092 public:
QueryPoolResultsSizeInstance(Context & context,QueryPoolResultsParamsPtr params)3093 				QueryPoolResultsSizeInstance	(Context&										context,
3094 												 QueryPoolResultsParamsPtr						params)
3095 					: QueryPoolResultsInstance	(context, params) {}
3096 	TestStatus	iterate							(void) override;
3097 	auto		makeCopyOfStructures			(const std::vector<ASInterfacePtr>&				structs,
3098 												 const std::vector<VkDeviceSize>				sizes) -> std::vector<ASInterfacePtr>;
3099 	auto		getStructureSizes				(const std::vector<VkAccelerationStructureKHR>&	handles) -> std::vector<VkDeviceSize>;
3100 };
3101 
3102 class QueryPoolResultsPointersInstance : public QueryPoolResultsInstance
3103 {
3104 public:
QueryPoolResultsPointersInstance(Context & context,QueryPoolResultsParamsPtr params)3105 				QueryPoolResultsPointersInstance (Context& context, QueryPoolResultsParamsPtr params)
3106 					: QueryPoolResultsInstance(context, params) {}
3107 
3108 	TestStatus	iterate							  (void) override;
3109 };
3110 
3111 class QueryPoolResultsCase : public TestCase
3112 {
3113 public:
QueryPoolResultsCase(TestContext & ctx,const char * name,QueryPoolResultsParamsPtr params)3114 					QueryPoolResultsCase	(TestContext&				ctx,
3115 											 const char*				name,
3116 											 QueryPoolResultsParamsPtr	params)
3117 						: TestCase(ctx, name, std::string())
3118 						, m_params(params) {}
3119 	void			checkSupport			(Context&					context) const override;
3120 	TestInstance*	createInstance			(Context&					context) const override;
3121 
3122 	template<class T, class P = T(*)[1], class R = decltype(std::begin(*std::declval<P>()))>
makeStdBeginEnd(void * p,deUint32 n)3123 	static auto makeStdBeginEnd(void* p, deUint32 n) -> std::pair<R, R>
3124 	{
3125 		auto tmp = std::begin(*P(p));
3126 		auto begin = tmp;
3127 		std::advance(tmp, n);
3128 		return { begin, tmp };
3129 	}
3130 
3131 private:
3132 	const QueryPoolResultsParamsPtr	m_params;
3133 };
3134 
createInstance(Context & context) const3135 TestInstance* QueryPoolResultsCase::createInstance (Context& context) const
3136 {
3137 	switch (m_params->queryType)
3138 	{
3139 		case QueryPoolResultsParams::Type::StructureSize:	return new QueryPoolResultsSizeInstance(context, m_params);
3140 		case QueryPoolResultsParams::Type::PointerCount:	return new QueryPoolResultsPointersInstance(context, m_params);
3141 	}
3142 	TCU_THROW(InternalError, "Unknown test type");
3143 	return nullptr;
3144 }
3145 
checkSupport(Context & context) const3146 void QueryPoolResultsCase::checkSupport (Context& context) const
3147 {
3148 	context.requireDeviceFunctionality(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME);
3149 	context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME);
3150 
3151 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
3152 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
3153 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
3154 
3155 	const VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR& maintenance1FeaturesKHR = context.getRayTracingMaintenance1Features();
3156 	if (maintenance1FeaturesKHR.rayTracingMaintenance1 == VK_FALSE)
3157 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR::rayTracingMaintenance1");
3158 }
3159 
prepareBottomAccStructures(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer)3160 auto QueryPoolResultsInstance::prepareBottomAccStructures (const DeviceInterface&	vk,
3161 														   VkDevice					device,
3162 														   Allocator&				allocator,
3163 														   VkCommandBuffer			cmdBuffer) -> std::vector<BlasPtr>
3164 {
3165 	std::vector<Vec3>		triangle		=
3166 	{
3167 		{ 0.0, 0.0, 0.0 },
3168 		{ 0.5, 0.0, 0.0 },
3169 		{ 0.0, 0.5, 0.0 },
3170 	};
3171 
3172 	const deUint32			triangleCount	= ((1 + m_params->blasCount) * m_params->blasCount) / 2;
3173 	const float				angle			= (4.0f * std::acos(0.0f)) / float(triangleCount);
3174 	auto					rotateCcwZ		= [&](const Vec3& p, const Vec3& center) -> tcu::Vec3
3175 	{
3176 		const float s = std::sin(angle);
3177 		const float c = std::cos(angle);
3178 		const auto  t = p - center;
3179 		return tcu::Vec3(c * t.x() - s * t.y(), s * t.x() + c * t.y(), t.z()) + center;
3180 	};
3181 	auto					nextGeometry	= [&]() -> void
3182 	{
3183 		for (auto& vertex : triangle)
3184 			vertex = rotateCcwZ(vertex, Vec3(0.0f, 0.0f, 0.0f));
3185 	};
3186 
3187 	std::vector<BlasPtr>	bottoms			(m_params->blasCount);
3188 
3189 	for (deUint32 b = 0; b < m_params->blasCount; ++b)
3190 	{
3191 		BlasPtr blas(makeBottomLevelAccelerationStructure().release());
3192 
3193 		blas->setBuildType(m_params->buildType);
3194 		if (m_params->compacted)
3195 		{
3196 			blas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
3197 		}
3198 		blas->addGeometry(triangle, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3199 		for (deUint32 geom = b; geom < m_params->blasCount; ++geom)
3200 		{
3201 			nextGeometry();
3202 			blas->addGeometry(triangle, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3203 		}
3204 
3205 		blas->createAndBuild(vk, device, cmdBuffer, allocator);
3206 
3207 		bottoms[b] = blas;
3208 	}
3209 
3210 	return bottoms;
3211 }
3212 
prepareTopAccStructure(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer,const std::vector<BlasPtr> & bottoms)3213 auto QueryPoolResultsInstance::prepareTopAccStructure (const DeviceInterface&		vk,
3214 													   VkDevice						device,
3215 													   Allocator&					allocator,
3216 													   VkCommandBuffer				cmdBuffer,
3217 													   const std::vector<BlasPtr>&	bottoms) -> TlasPtr
3218 {
3219 	const std::size_t	instanceCount = bottoms.size();
3220 
3221 	de::MovePtr<TopLevelAccelerationStructure>	tlas = makeTopLevelAccelerationStructure();
3222 	tlas->setBuildType(m_params->buildType);
3223 	if (m_params->compacted)
3224 	{
3225 		tlas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
3226 	}
3227 	tlas->setInstanceCount(instanceCount);
3228 
3229 	for (std::size_t i = 0; i < instanceCount; ++i)
3230 	{
3231 		tlas->addInstance(bottoms[i], identityMatrix3x4, 0, 0xFFu, 0u, VkGeometryInstanceFlagsKHR(0));
3232 	}
3233 
3234 	tlas->createAndBuild(vk, device, cmdBuffer, allocator);
3235 
3236 	return TlasPtr(tlas.release());
3237 }
3238 
getStructureSizes(const std::vector<VkAccelerationStructureKHR> & handles)3239 auto QueryPoolResultsSizeInstance::getStructureSizes (const std::vector<VkAccelerationStructureKHR>&	handles) -> std::vector<VkDeviceSize>
3240 {
3241 	const DeviceInterface&				vk				= m_context.getDeviceInterface();
3242 	const VkDevice						device			= m_context.getDevice();
3243 	const deUint32						familyIndex		= m_context.getUniversalQueueFamilyIndex();
3244 	const VkQueue						queue			= m_context.getUniversalQueue();
3245 	Allocator&							allocator		= m_context.getDefaultAllocator();
3246 
3247 	const Move<VkCommandPool>			cmdPool			= createCommandPool(vk, device, 0, familyIndex);
3248 	const Move<VkCommandBuffer>			cmdBuffer		= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3249 
3250 	const deUint32						queryCount		= static_cast<deUint32>(handles.size());
3251 
3252 	Move<VkQueryPool>					queryPoolSize	= makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR, queryCount);
3253 	Move<VkQueryPool>					queryPoolSerial	= makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3254 	Move<VkQueryPool>					queryPoolCompact= m_params->compacted
3255 											? makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount)
3256 											: Move<VkQueryPool>();
3257 
3258 	de::MovePtr<BufferWithMemory>		buffer;
3259 	std::vector<VkDeviceSize>			sizeSizes		(queryCount, 0);
3260 	std::vector<VkDeviceSize>			serialSizes		(queryCount, 0);
3261 	std::vector<VkDeviceSize>			compactSizes	(queryCount, 0);
3262 
3263 	if (m_params->inVkBuffer)
3264 	{
3265 		const auto vci = makeBufferCreateInfo((m_params->compacted ? 3 : 2) * queryCount * sizeof(VkDeviceSize), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3266 		buffer = makeMovePtr<BufferWithMemory>(vk, device, allocator, vci, MemoryRequirement::Coherent | MemoryRequirement::HostVisible);
3267 	}
3268 
3269 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3270 	{
3271 		beginCommandBuffer(vk, *cmdBuffer, 0);
3272 
3273 		vk.cmdResetQueryPool(*cmdBuffer, *queryPoolSize, 0, queryCount);
3274 		vk.cmdResetQueryPool(*cmdBuffer, *queryPoolSerial, 0, queryCount);
3275 		if (m_params->compacted)
3276 		{
3277 			vk.cmdResetQueryPool(*cmdBuffer, *queryPoolCompact, 0, queryCount);
3278 		}
3279 
3280 		vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR, *queryPoolSize, 0);
3281 		vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, *queryPoolSerial, 0);
3282 
3283 		if (m_params->compacted)
3284 		{
3285 			vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, *queryPoolCompact, 0);
3286 		}
3287 
3288 		if (m_params->inVkBuffer)
3289 		{
3290 			vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolSize, 0, queryCount, **buffer, (0 * queryCount * sizeof(VkDeviceSize)),
3291 									   sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3292 			vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolSerial, 0, queryCount, **buffer, (1 * queryCount * sizeof(VkDeviceSize)),
3293 									   sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3294 			if (m_params->compacted)
3295 			{
3296 				vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolCompact, 0, queryCount, **buffer, (2 * queryCount * sizeof(VkDeviceSize)),
3297 											sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3298 			}
3299 		}
3300 		endCommandBuffer(vk, *cmdBuffer);
3301 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3302 
3303 		if (m_params->inVkBuffer)
3304 		{
3305 			Allocation&	alloc		= buffer->getAllocation();
3306 			invalidateMappedMemoryRange(vk, device, alloc.getMemory(), alloc.getOffset(), VK_WHOLE_SIZE);
3307 
3308 			deUint8*	ptrSize		= reinterpret_cast<deUint8*>(alloc.getHostPtr());
3309 			deUint8*	ptrSerial	= ptrSize + queryCount * sizeof(VkDeviceSize);
3310 
3311 			auto		rangeSize	= QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(ptrSize, queryCount);
3312 			auto		rangeSerial	= QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(ptrSerial, queryCount);
3313 
3314 			std::copy_n(rangeSize.first, queryCount, sizeSizes.begin());
3315 			std::copy_n(rangeSerial.first, queryCount, serialSizes.begin());
3316 
3317 			if (m_params->compacted)
3318 			{
3319 				auto	ptrCompact	= ptrSize + 2 * queryCount * sizeof(VkDeviceSize);
3320 				auto	rangeCompact= QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(ptrCompact, queryCount);
3321 				std::copy_n(rangeCompact.first, queryCount, compactSizes.begin());
3322 			}
3323 		}
3324 		else
3325 		{
3326 			VK_CHECK(vk.getQueryPoolResults(device, *queryPoolSize, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3327 											sizeSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3328 			VK_CHECK(vk.getQueryPoolResults(device, *queryPoolSerial, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3329 											serialSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3330 			if (m_params->compacted)
3331 			{
3332 				VK_CHECK(vk.getQueryPoolResults(device, *queryPoolCompact, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3333 												compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3334 			}
3335 		}
3336 	}
3337 	else
3338 	{
3339 		vk.writeAccelerationStructuresPropertiesKHR(device, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR,
3340 													queryCount * sizeof(VkDeviceSize), sizeSizes.data(), sizeof(VkDeviceSize));
3341 		vk.writeAccelerationStructuresPropertiesKHR(device, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR,
3342 													queryCount * sizeof(VkDeviceSize), serialSizes.data(), sizeof(VkDeviceSize));
3343 		if (m_params->compacted)
3344 		{
3345 			vk.writeAccelerationStructuresPropertiesKHR(device, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR,
3346 													queryCount * sizeof(VkDeviceSize), compactSizes.data(), sizeof(VkDeviceSize));
3347 		}
3348 	}
3349 
3350 	sizeSizes.insert(sizeSizes.end(), serialSizes.begin(), serialSizes.end());
3351 	sizeSizes.insert(sizeSizes.end(), compactSizes.begin(), compactSizes.end());
3352 
3353 	return sizeSizes;
3354 }
3355 
makeCopyOfStructures(const std::vector<ASInterfacePtr> & structs,const std::vector<VkDeviceSize> sizes)3356 auto QueryPoolResultsSizeInstance::makeCopyOfStructures (const std::vector<ASInterfacePtr>&	structs,
3357 														 const std::vector<VkDeviceSize>	sizes) -> std::vector<ASInterfacePtr>
3358 {
3359 	const DeviceInterface&				vk				= m_context.getDeviceInterface();
3360 	const VkDevice						device			= m_context.getDevice();
3361 	const VkQueue						queue			= m_context.getUniversalQueue();
3362 
3363 	Move<VkCommandPool>					cmdPool;
3364 	Move<VkCommandBuffer>				cmdBuffer;
3365 
3366 	std::vector<ASInterfacePtr>			copies;
3367 
3368 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3369 	{
3370 		const deUint32	familyIndex	= m_context.getUniversalQueueFamilyIndex();
3371 						cmdPool		= createCommandPool(vk, device, 0, familyIndex);
3372 						cmdBuffer	= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3373 		beginCommandBuffer(vk, *cmdBuffer, 0u);
3374 	}
3375 
3376 	for (auto begin = structs.begin(), i = begin; i != structs.end(); ++i)
3377 	{
3378 		copies.push_back((*i)->clone(m_context, m_params->buildType, *cmdBuffer, sizes.at(std::distance(begin, i))));
3379 	}
3380 
3381 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3382 	{
3383 		endCommandBuffer(vk, *cmdBuffer);
3384 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3385 	}
3386 
3387 	return copies;
3388 }
3389 
iterate(void)3390 TestStatus QueryPoolResultsSizeInstance::iterate (void)
3391 {
3392 	const DeviceInterface&								vk				= m_context.getDeviceInterface();
3393 	const VkDevice										device			= m_context.getDevice();
3394 	const deUint32										familyIndex		= m_context.getUniversalQueueFamilyIndex();
3395 	const VkQueue										queue			= m_context.getUniversalQueue();
3396 	Allocator&											allocator		= m_context.getDefaultAllocator();
3397 
3398 	const Move<VkCommandPool>							cmdPool			= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, familyIndex);
3399 	const Move<VkCommandBuffer>							cmdBuffer		= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3400 
3401 	beginCommandBuffer(vk, *cmdBuffer, 0);
3402 	const std::vector<BlasPtr>							bottoms			= prepareBottomAccStructures(vk, device, allocator, *cmdBuffer);
3403 	TlasPtr												tlas			= prepareTopAccStructure(vk, device, allocator, *cmdBuffer, bottoms);
3404 	endCommandBuffer(vk, *cmdBuffer);
3405 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3406 
3407 	const deUint32										queryCount		= m_params->blasCount + 1;
3408 	std::vector<VkAccelerationStructureKHR>				handles			(queryCount);
3409 	handles[0] = *tlas->getPtr();
3410 	std::transform(bottoms.begin(), bottoms.end(), std::next(handles.begin()), [](const BlasPtr& blas){ return *blas->getPtr(); });
3411 
3412 	// only the first queryCount elements are results from ACCELERATION_STRUCTURE_SIZE queries.
3413 	const std::vector<VkDeviceSize>						sourceSizes		= getStructureSizes(handles);
3414 
3415 	std::vector<ASInterfacePtr>							sourceStructures;
3416 	sourceStructures.push_back(makeASInterfacePtr(tlas));
3417 	for (BlasPtr blas : bottoms) sourceStructures.push_back(makeASInterfacePtr(blas));
3418 
3419 	std::vector<ASInterfacePtr>							copies = makeCopyOfStructures(sourceStructures, sourceSizes);
3420 	std::transform(copies.begin(), copies.end(), handles.begin(), [](const ASInterfacePtr& intf) { return intf->getPtr(); });
3421 
3422 	const std::vector<VkDeviceSize>						copySizes = getStructureSizes(handles);
3423 
3424 	// verification
3425 	bool pass = true;
3426 	for (deUint32 i = 0; pass && i < queryCount; ++i)
3427 	{
3428 		pass = sourceSizes.at(i) == copySizes.at(i);
3429 	}
3430 
3431 	return pass ? TestStatus::pass("") : TestStatus::fail("");
3432 }
3433 
iterate(void)3434 TestStatus QueryPoolResultsPointersInstance::iterate (void)
3435 {
3436 	const DeviceInterface&								vk				= m_context.getDeviceInterface();
3437 	const VkDevice										device			= m_context.getDevice();
3438 	const deUint32										familyIndex		= m_context.getUniversalQueueFamilyIndex();
3439 	const VkQueue										queue			= m_context.getUniversalQueue();
3440 	Allocator&											allocator		= m_context.getDefaultAllocator();
3441 
3442 	const Move<VkCommandPool>							cmdPool			= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, familyIndex);
3443 	const Move<VkCommandBuffer>							cmdBuffer		= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3444 
3445 	beginCommandBuffer(vk, *cmdBuffer, 0);
3446 	const std::vector<BlasPtr>							bottoms			= prepareBottomAccStructures(vk, device, allocator, *cmdBuffer);
3447 	TlasPtr												tlas			= prepareTopAccStructure(vk, device, allocator, *cmdBuffer, bottoms);
3448 	endCommandBuffer(vk, *cmdBuffer);
3449 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3450 
3451 	const deUint32										queryCount		= m_params->blasCount + 1;
3452 	std::vector<VkAccelerationStructureKHR>				handles			(queryCount);
3453 	handles[0] = *tlas.get()->getPtr();
3454 	std::transform(bottoms.begin(), bottoms.end(), std::next(handles.begin()), [](const BlasPtr& blas){ return *blas.get()->getPtr(); });
3455 
3456 	const VkQueryType									queryType		= VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR;
3457 	Move<VkQueryPool>									queryPoolCounts	= makeQueryPool(vk, device, queryType, queryCount);
3458 
3459 	de::MovePtr<BufferWithMemory>						buffer;
3460 	std::vector<VkDeviceSize>							pointerCounts	(queryCount, 123u);
3461 
3462 	if (m_params->inVkBuffer)
3463 	{
3464 		const auto vci = makeBufferCreateInfo(queryCount * sizeof(VkDeviceSize), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3465 		buffer = makeMovePtr<BufferWithMemory>(vk, device, allocator, vci, MemoryRequirement::Coherent | MemoryRequirement::HostVisible);
3466 	}
3467 
3468 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3469 	{
3470 		beginCommandBuffer(vk, *cmdBuffer, 0);
3471 		vk.cmdResetQueryPool(*cmdBuffer, *queryPoolCounts, 0, queryCount);
3472 		vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(), queryType, *queryPoolCounts, 0);
3473 		if (m_params->inVkBuffer)
3474 		{
3475 			vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolCounts, 0, queryCount, **buffer, 0 /*offset*/,
3476 									   sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3477 		}
3478 		endCommandBuffer(vk, *cmdBuffer);
3479 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3480 
3481 		if (m_params->inVkBuffer)
3482 		{
3483 			Allocation&	alloc		= buffer->getAllocation();
3484 			invalidateMappedMemoryRange(vk, device, alloc.getMemory(), alloc.getOffset(), VK_WHOLE_SIZE);
3485 			auto		rangeCounts	= QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(alloc.getHostPtr(), queryCount);
3486 			std::copy_n(rangeCounts.first, queryCount, pointerCounts.begin());
3487 		}
3488 		else
3489 		{
3490 			VK_CHECK(vk.getQueryPoolResults(device, *queryPoolCounts, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3491 											pointerCounts.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3492 		}
3493 	}
3494 	else
3495 	{
3496 		vk.writeAccelerationStructuresPropertiesKHR(device, queryCount, handles.data(), queryType,
3497 													queryCount * sizeof(VkDeviceSize), pointerCounts.data(), sizeof(VkDeviceSize));
3498 	}
3499 
3500 	// verification
3501 	const std::vector<VkDeviceSize>						inSizes			= tlas->getSerializingSizes(vk, device, queue, familyIndex);
3502 	SerialStorage										storage			(vk, device, allocator, m_params->buildType, inSizes[0]);
3503 
3504 	beginCommandBuffer(vk, *cmdBuffer, 0);
3505 	tlas->serialize(vk, device, *cmdBuffer, &storage);
3506 	endCommandBuffer(vk, *cmdBuffer);
3507 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3508 
3509 	const SerialStorage::AccelerationStructureHeader*	header			= storage.getASHeader();
3510 
3511 	bool pass = (header->handleCount == pointerCounts[0]); // must be the same as bottoms.size()
3512 	for (deUint32 i = 1; pass && i < queryCount; ++i)
3513 	{
3514 		pass = (0 == pointerCounts[i]); // bottoms have no chidren
3515 	}
3516 
3517 	return pass ? TestStatus::pass("") : TestStatus::fail("");
3518 }
3519 
3520 
3521 struct CopyWithinPipelineParams
3522 {
3523 	enum class Type
3524 	{
3525 		StageASCopyBit,
3526 		StageAllTransferBit,
3527 		AccessSBTReadBit
3528 	}									type;
3529 	deUint32							width;
3530 	deUint32							height;
3531 	VkAccelerationStructureBuildTypeKHR	build;
3532 };
3533 typedef de::SharedPtr<const CopyWithinPipelineParams> CopyWithinPipelineParamsPtr;
3534 
3535 class CopyWithinPipelineInstance : public TestInstance
3536 {
3537 public:
3538 	using TlasPtr = de::SharedPtr<TopLevelAccelerationStructure>;
3539 	using BlasPtr = de::SharedPtr<BottomLevelAccelerationStructure>;
3540 
CopyWithinPipelineInstance(Context & context,CopyWithinPipelineParamsPtr params)3541 				CopyWithinPipelineInstance (Context& context, CopyWithinPipelineParamsPtr params)
3542 					: TestInstance	(context)
3543 					, vk			(context.getDeviceInterface())
3544 					, device		(context.getDevice())
3545 					, allocator		(context.getDefaultAllocator())
3546 					, rgenShader	(createShaderModule(vk, device, context.getBinaryCollection().get("rgen")))
3547 					, chitShader	(createShaderModule(vk, device, context.getBinaryCollection().get("chit")))
3548 					, missShader	(createShaderModule(vk, device, context.getBinaryCollection().get("miss")))
3549 					, m_params		(params)
3550 					, m_format		(VK_FORMAT_R32G32B32A32_SFLOAT) {}
3551 protected:
3552 	const DeviceInterface&		vk;
3553 	const VkDevice				device;
3554 	Allocator&					allocator;
3555 	Move<VkShaderModule>		rgenShader;
3556 	Move<VkShaderModule>		chitShader;
3557 	Move<VkShaderModule>		missShader;
3558 	CopyWithinPipelineParamsPtr	m_params;
3559 	VkFormat					m_format;
3560 };
3561 
3562 class CopyBlasInstance : public CopyWithinPipelineInstance
3563 {
3564 public:
CopyBlasInstance(Context & context,CopyWithinPipelineParamsPtr params)3565 				CopyBlasInstance	(Context& context, CopyWithinPipelineParamsPtr params)
3566 					: CopyWithinPipelineInstance(context, params) {}
3567 	TestStatus	iterate				(void) override;
3568 	auto		getRefImage			(BlasPtr blas) const -> de::MovePtr<BufferWithMemory>;
3569 
3570 };
3571 
3572 class CopySBTInstance : public CopyWithinPipelineInstance
3573 {
3574 public:
CopySBTInstance(Context & context,CopyWithinPipelineParamsPtr params)3575 				CopySBTInstance		(Context&			context,
3576 									 CopyWithinPipelineParamsPtr params)
3577 					: CopyWithinPipelineInstance(context, params) {}
3578 	TestStatus	iterate			(void) override;
3579 	auto		getBufferSizeForSBT	(const deUint32&	groupCount,
3580 									 const deUint32&	shaderGroupHandleSize,
3581 									 const deUint32&	shaderGroupBaseAlignment) const -> VkDeviceSize;
3582 	auto		getBufferForSBT		(const deUint32&	groupCount,
3583 									 const deUint32&	shaderGroupHandleSize,
3584 									 const deUint32&	shaderGroupBaseAlignment) const -> de::MovePtr<BufferWithMemory>;
3585 };
3586 
3587 class PipelineStageASCase : public TestCase
3588 {
3589 public:
PipelineStageASCase(TestContext & ctx,const char * name,CopyWithinPipelineParamsPtr params)3590 					PipelineStageASCase	(TestContext&			ctx,
3591 										 const char*			name,
3592 										 CopyWithinPipelineParamsPtr	params)
3593 						: TestCase	(ctx, name, std::string())
3594 						, m_params	(params) {}
3595 	void			initPrograms	(SourceCollections&		programs) const override;
3596 	void			checkSupport	(Context&				context) const override;
3597 	TestInstance*	createInstance	(Context&				context) const override;
3598 
3599 private:
3600 	CopyWithinPipelineParamsPtr	m_params;
3601 };
3602 
3603 namespace u
3604 {
3605 namespace details
3606 {
3607 template<class X, class Y> struct BarrierMaker {
3608 	const X& m_x;
BarrierMakervkt::RayTracing::__anonb50811900111::u::details::BarrierMaker3609 	BarrierMaker (const X& x) : m_x(x) {}
countvkt::RayTracing::__anonb50811900111::u::details::BarrierMaker3610 	uint32_t count () const { return 1; }
pointervkt::RayTracing::__anonb50811900111::u::details::BarrierMaker3611 	const X* pointer () const { return &m_x; }
3612 };
3613 template<class Y> struct BarrierMaker<std::false_type, Y> {
BarrierMakervkt::RayTracing::__anonb50811900111::u::details::BarrierMaker3614 	BarrierMaker (const std::false_type&) {}
countvkt::RayTracing::__anonb50811900111::u::details::BarrierMaker3615 	uint32_t count () const { return 0; }
pointervkt::RayTracing::__anonb50811900111::u::details::BarrierMaker3616 	Y* pointer () const { return nullptr; }
3617 };
3618 template<class Z, uint32_t N> struct BarrierMaker<const Z[N], Z> {
3619 	const Z (&m_a)[N];
BarrierMakervkt::RayTracing::__anonb50811900111::u::details::BarrierMaker3620 	BarrierMaker (const Z (&a)[N]) : m_a(a) {}
countvkt::RayTracing::__anonb50811900111::u::details::BarrierMaker3621 	uint32_t count () const { return N; }
pointervkt::RayTracing::__anonb50811900111::u::details::BarrierMaker3622 	const Z* pointer () const { return m_a; }
3623 };
3624 template<class Mem, class Buf, class Img, class Exp>
3625 struct Sel {
3626 	typedef typename std::remove_cv<Mem>::type	t_Mem;
3627 	typedef typename std::remove_cv<Buf>::type	t_Buf;
3628 	typedef typename std::remove_cv<Img>::type	t_Img;
3629 	typedef std::integral_constant<uint32_t, 0> index0;
3630 	typedef std::integral_constant<uint32_t, 1> index1;
3631 	typedef std::integral_constant<uint32_t, 2> index2;
3632 	typedef std::integral_constant<uint32_t, 3> index3;
3633 	using isMem = std::is_same<t_Mem, Exp>;
3634 	using isBuf = std::is_same<t_Buf, Exp>;
3635 	using isImg = std::is_same<t_Img, Exp>;
3636 	template<bool B, class T, class F> using choose = typename std::conditional<B,T,F>::type;
3637 	typedef choose<isMem::value, BarrierMaker<Mem, Exp>,
3638 			choose<isBuf::value, BarrierMaker<Buf, Exp>,
3639 			choose<isImg::value, BarrierMaker<Img, Exp>,
3640 								 BarrierMaker<std::false_type, Exp>>>> type;
3641 	typedef choose<isMem::value, index0,
3642 			choose<isBuf::value, index1,
3643 			choose<isImg::value, index2,
3644 								 index3>>> index;
3645 };
3646 } // details
3647 constexpr std::false_type NoneBarriers{};
3648 /**
3649  * @brief	Helper function that makes and populates VkDependencyInfoKHR structure.
3650  * @param	barriers1 - any of VkMemoryBarrier2KHR, VkBufferMemoryBarrier2KHR or VkImageMemoryBarrier2KHR (mandatory param)
3651  * @param	barriers2 - any of VkMemoryBarrier2KHR, VkBufferMemoryBarrier2KHR or VkImageMemoryBarrier2KHR (optional param)
3652  * @param	barriers2 - any of VkMemoryBarrier2KHR, VkBufferMemoryBarrier2KHR or VkImageMemoryBarrier2KHR (optional param)
3653  * @note	The order of the parameters does not matter.
3654  */
3655 template<class Barriers1, class Barriers2 = std::false_type, class Barriers3 = std::false_type>
makeDependency(const Barriers1 & barriers1,const Barriers2 & barriers2=NoneBarriers,const Barriers3 & barriers3=NoneBarriers)3656 VkDependencyInfoKHR makeDependency (const Barriers1& barriers1, const Barriers2& barriers2 = NoneBarriers, const Barriers3& barriers3 = NoneBarriers)
3657 {
3658 	auto args = std::forward_as_tuple(barriers1, barriers2, barriers3, std::false_type());
3659 	const uint32_t memIndex = details::Sel<Barriers1, Barriers2, Barriers3, VkMemoryBarrier2KHR>::index::value;
3660 	const uint32_t bufIndex = details::Sel<Barriers1, Barriers2, Barriers3, VkBufferMemoryBarrier2KHR>::index::value;
3661 	const uint32_t imgIndex = details::Sel<Barriers1, Barriers2, Barriers3, VkImageMemoryBarrier2KHR>::index::value;
3662 	typedef typename details::Sel<Barriers1, Barriers2, Barriers3, VkMemoryBarrier2KHR>::type		memType;
3663 	typedef typename details::Sel<Barriers1, Barriers2, Barriers3, VkBufferMemoryBarrier2KHR>::type	bufType;
3664 	typedef typename details::Sel<Barriers1, Barriers2, Barriers3, VkImageMemoryBarrier2KHR>::type	imgType;
3665 	return
3666 	{
3667 		VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,			// VkStructureType					sType;
3668 		nullptr,										// const void*						pNext;
3669 		VK_DEPENDENCY_BY_REGION_BIT,					// VkDependencyFlags				dependencyFlags;
3670 		memType(std::get<memIndex>(args)).count(),		// uint32_t							memoryBarrierCount;
3671 		memType(std::get<memIndex>(args)).pointer(),	// const VkMemoryBarrier2KHR*		pMemoryBarriers;
3672 		bufType(std::get<bufIndex>(args)).count(),		// uint32_t							bufferMemoryBarrierCount;
3673 		bufType(std::get<bufIndex>(args)).pointer(),	// const VkBufferMemoryBarrier2KHR*	pBufferMemoryBarriers;
3674 		imgType(std::get<imgIndex>(args)).count(),		// uint32_t							imageMemoryBarrierCount;
3675 		imgType(std::get<imgIndex>(args)).pointer()		// const VkImageMemoryBarrier2KHR*	pImageMemoryBarriers;
3676 	};
3677 }
3678 } // u
3679 
createInstance(Context & context) const3680 TestInstance* PipelineStageASCase::createInstance (Context& context) const
3681 {
3682 	de::MovePtr<TestInstance> instance;
3683 	switch (m_params->type)
3684 	{
3685 	case CopyWithinPipelineParams::Type::StageASCopyBit:
3686 	case CopyWithinPipelineParams::Type::StageAllTransferBit:
3687 		instance = makeMovePtr<CopyBlasInstance>(context, m_params);
3688 		break;
3689 	case CopyWithinPipelineParams::Type::AccessSBTReadBit:
3690 		instance = makeMovePtr<CopySBTInstance>(context, m_params);
3691 		break;
3692 	}
3693 	return instance.release();
3694 }
3695 
initPrograms(SourceCollections & programs) const3696 void PipelineStageASCase::initPrograms (SourceCollections& programs) const
3697 {
3698 	const vk::ShaderBuildOptions	buildOptions	(programs.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
3699 	const char						endl			= '\n';
3700 
3701 	{
3702 		std::stringstream str;
3703 		str << "#version 460 core"																		<< endl
3704 			<< "#extension GL_EXT_ray_tracing : require"												<< endl
3705 			<< "layout(location = 0) rayPayloadEXT vec4 payload;"										<< endl
3706 			<< "layout(rgba32f, set = 0, binding = 0) uniform image2D result;"							<< endl
3707 			<< "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;"				<< endl
3708 			<< "void main()"																			<< endl
3709 			<< "{"																						<< endl
3710 			<< "  float rx           = (float(gl_LaunchIDEXT.x) + 0.5) / float(gl_LaunchSizeEXT.x);"	<< endl
3711 			<< "  float ry           = (float(gl_LaunchIDEXT.y) + 0.5) / float(gl_LaunchSizeEXT.y);"	<< endl
3712 			<< "  payload            = vec4(0.5, 0.5, 0.5, 1.0);"										<< endl
3713 			<< "  vec3  orig         = vec3(rx, ry, 1.0);"												<< endl
3714 			<< "  vec3  dir          = vec3(0.0, 0.0, -1.0);"											<< endl
3715 			<< "  traceRayEXT(topLevelAS, gl_RayFlagsNoneEXT, 0xFFu, 0, 0, 0, orig, 0.0, dir, 2.0, 0);"	<< endl
3716 			<< "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), payload);"								<< endl
3717 			<< "}";
3718 		str.flush();
3719 		programs.glslSources.add("rgen") << glu::RaygenSource(str.str()) << buildOptions;
3720 	}
3721 
3722 	{
3723 		std::stringstream str;
3724 		str << "#version 460 core"									<< endl
3725 			<< "#extension GL_EXT_ray_tracing : require"			<< endl
3726 			<< "layout(location = 0) rayPayloadInEXT vec4 payload;"	<< endl
3727 			<< "void main()"										<< endl
3728 			<< "{"													<< endl
3729 			<< "  payload = vec4(0.0, 1.0, 0.0, 1.0);"				<< endl
3730 			<< "}";
3731 		str.flush();
3732 		programs.glslSources.add("chit") << glu::ClosestHitSource(str.str()) << buildOptions;
3733 	}
3734 
3735 	{
3736 		std::stringstream str;
3737 		str	<< "#version 460 core"									<< endl
3738 			<< "#extension GL_EXT_ray_tracing : require"			<< endl
3739 			<< "layout(location = 0) rayPayloadInEXT vec4 payload;"	<< endl
3740 			<< "void main()"										<< endl
3741 			<< "{"													<< endl
3742 			<< "  payload = vec4(1.0, 0.0, 0.0, 1.0);"				<< endl
3743 			<< "}";
3744 		str.flush();
3745 		programs.glslSources.add("miss") << glu::MissSource(str.str()) << buildOptions;
3746 	}
3747 }
3748 
checkSupport(Context & context) const3749 void PipelineStageASCase::checkSupport (Context& context) const
3750 {
3751 	context.requireInstanceFunctionality(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
3752 	context.requireDeviceFunctionality(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME);
3753 	context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME);
3754 	context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME);
3755 	context.requireDeviceFunctionality(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
3756 
3757 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
3758 	if (m_params->build == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
3759 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR::accelerationStructureHostCommands");
3760 
3761 	const VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR& maintenance1FeaturesKHR = context.getRayTracingMaintenance1Features();
3762 	if (maintenance1FeaturesKHR.rayTracingMaintenance1 == VK_FALSE)
3763 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR::rayTracingMaintenance1");
3764 
3765 	const VkPhysicalDeviceSynchronization2FeaturesKHR& synchronization2Features = context.getSynchronization2Features();
3766 	if (synchronization2Features.synchronization2 == VK_FALSE)
3767 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceSynchronization2FeaturesKHR::synchronization2");
3768 
3769 	if (m_params->type != CopyWithinPipelineParams::Type::AccessSBTReadBit)
3770 	{
3771 		context.requireDeviceFunctionality(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
3772 		const VkPhysicalDevicePushDescriptorPropertiesKHR&		pushDescriptorProperties = context.getPushDescriptorProperties();
3773 		if (pushDescriptorProperties.maxPushDescriptors < 32)
3774 			TCU_THROW(NotSupportedError, "Requires VK_KHR_push_descriptor extension");
3775 	}
3776 }
3777 
getRefImage(BlasPtr blas) const3778 auto CopyBlasInstance::getRefImage (BlasPtr blas) const -> de::MovePtr<BufferWithMemory>
3779 {
3780 	const deUint32							queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
3781 	const VkQueue							queue						= m_context.getUniversalQueue();
3782 
3783 	const de::MovePtr<RayTracingProperties>	rtProps						= makeRayTracingProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice());
3784 	const deUint32							shaderGroupHandleSize		= rtProps->getShaderGroupHandleSize();
3785 	const deUint32							shaderGroupBaseAlignment	= rtProps->getShaderGroupBaseAlignment();
3786 
3787 	const VkImageCreateInfo					imageCreateInfo				= makeImageCreateInfo(m_params->width, m_params->height, m_format);
3788 	const VkImageSubresourceRange			imageSubresourceRange		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
3789 	const de::MovePtr<ImageWithMemory>		image						= makeMovePtr<ImageWithMemory>(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any);
3790 	const Move<VkImageView>					view						= makeImageView(vk, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
3791 
3792 	const deUint32							bufferSize					= (m_params->width * m_params->height * mapVkFormat(m_format).getPixelSize());
3793 	const VkBufferCreateInfo				bufferCreateInfo			= makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3794 	de::MovePtr<BufferWithMemory>			buffer						= makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
3795 
3796 	const VkImageSubresourceLayers			imageSubresourceLayers		= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
3797 	const VkBufferImageCopy					bufferCopyImageRegion		= makeBufferImageCopy(makeExtent3D(m_params->width, m_params->height, 1u), imageSubresourceLayers);
3798 
3799 	de::MovePtr<RayTracingPipeline>			rtPipeline					= makeMovePtr<RayTracingPipeline>();
3800 	rtPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		*rgenShader, 0);
3801 	rtPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	*chitShader, 1);
3802 	rtPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			*missShader, 2);
3803 
3804 	const Move<VkDescriptorPool>			descriptorPool				= DescriptorPoolBuilder()
3805 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2)
3806 		.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 2)
3807 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3808 	const Move<VkDescriptorSetLayout>		descriptorSetLayout			= DescriptorSetLayoutBuilder()
3809 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
3810 		.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
3811 		.build(vk, device);
3812 	const Move<VkDescriptorSet>				descriptorSet			= makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3813 
3814 	const Move<VkPipelineLayout>			pipelineLayout				= makePipelineLayout(vk, device, *descriptorSetLayout);
3815 	Move<VkPipeline>						pipeline					= rtPipeline->createPipeline(vk, device, *pipelineLayout);
3816 
3817 	de::MovePtr<BufferWithMemory>			rgenSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
3818 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
3819 	VkStridedDeviceAddressRegionKHR			rgenRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **rgenSbt, 0),
3820 																											shaderGroupHandleSize, shaderGroupHandleSize);
3821 	de::MovePtr<BufferWithMemory>			chitSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
3822 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
3823 	VkStridedDeviceAddressRegionKHR			chitRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **chitSbt, 0),
3824 																											shaderGroupHandleSize, shaderGroupHandleSize);
3825 	de::MovePtr<BufferWithMemory>			missSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
3826 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
3827 	VkStridedDeviceAddressRegionKHR			missRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **missSbt, 0),
3828 																											shaderGroupHandleSize, shaderGroupHandleSize);
3829 	const VkStridedDeviceAddressRegionKHR	callRegion					= makeStridedDeviceAddressRegionKHR(VkDeviceAddress(0), 0, 0);
3830 
3831 	const VkClearValue						clearValue					= { { { 0.1f, 0.2f, 0.3f, 0.4f } } };
3832 
3833 	const VkImageMemoryBarrier2KHR			preClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, 0,
3834 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
3835 																								  VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
3836 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
3837 	const VkImageMemoryBarrier2KHR			postClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
3838 																								  VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_READ_BIT_KHR,
3839 																								  VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
3840 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
3841 	const VkDependencyInfoKHR				preClearImageDependency		= u::makeDependency(preClearImageImageBarrier);
3842 	const VkDependencyInfoKHR				postClearImageDependency	= u::makeDependency(postClearImageImageBarrier);
3843 
3844 
3845 	const VkImageMemoryBarrier2KHR			postTraceRaysImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
3846 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_READ_BIT_KHR,
3847 																								  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
3848 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
3849 	const VkImageMemoryBarrier2KHR			postCopyImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,	VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
3850 																								  VK_PIPELINE_STAGE_2_HOST_BIT_KHR, VK_ACCESS_2_HOST_READ_BIT_KHR,
3851 																								  VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
3852 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
3853 	const VkDependencyInfoKHR				postTraceRaysDependency		= u::makeDependency(postTraceRaysImageBarrier);
3854 	const VkDependencyInfoKHR				postCopyImageDependency		= u::makeDependency(postCopyImageImageBarrier);
3855 
3856 	const Move<VkCommandPool>				cmdPool						= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
3857 	const Move<VkCommandBuffer>				cmdBuffer					= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3858 
3859 	auto									tlas						= makeTopLevelAccelerationStructure();
3860 	tlas->setBuildType(m_params->build);
3861 	tlas->setInstanceCount(1);
3862 	tlas->addInstance(blas, identityMatrix3x4, 0, (~0u), 0, VkGeometryInstanceFlagsKHR(0));
3863 	beginCommandBuffer(vk, *cmdBuffer);
3864 		tlas->createAndBuild(vk, device, *cmdBuffer, allocator);
3865 	endCommandBuffer(vk, *cmdBuffer);
3866 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3867 
3868 	const VkDescriptorImageInfo				descriptorImageInfo			= makeDescriptorImageInfo(VkSampler(), *view, VK_IMAGE_LAYOUT_GENERAL);
3869 	const VkWriteDescriptorSetAccelerationStructureKHR writeDescriptorTlas
3870 	{
3871 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
3872 		nullptr,															//  const void*							pNext;
3873 		1,																	//  deUint32							accelerationStructureCount;
3874 		tlas->getPtr()														//  const VkAccelerationStructureKHR*	pAccelerationStructures;
3875 	};
3876 
3877 	DescriptorSetUpdateBuilder()
3878 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
3879 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &writeDescriptorTlas)
3880 		.update(vk, device);
3881 
3882 	beginCommandBuffer(vk, *cmdBuffer);
3883 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
3884 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, nullptr);
3885 		vk.cmdPipelineBarrier2(*cmdBuffer, &preClearImageDependency);
3886 		vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
3887 		vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImageDependency);
3888 		cmdTraceRays(vk,
3889 			*cmdBuffer,
3890 			&rgenRegion,	// rgen
3891 			&missRegion,	// miss
3892 			&chitRegion,	// hit
3893 			&callRegion,	// call
3894 			m_params->width, m_params->height, 1);
3895 		vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
3896 		vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **buffer, 1u, &bufferCopyImageRegion);
3897 		vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
3898 	endCommandBuffer(vk, *cmdBuffer);
3899 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3900 
3901 	invalidateMappedMemoryRange(vk, device, buffer->getAllocation().getMemory(), buffer->getAllocation().getOffset(), bufferSize);
3902 
3903 	return buffer;
3904 }
3905 
iterate(void)3906 TestStatus CopyBlasInstance::iterate (void)
3907 {
3908 	const deUint32							queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
3909 	const VkQueue							queue						= m_context.getUniversalQueue();
3910 
3911 	const de::MovePtr<RayTracingProperties>	rtProps						= makeRayTracingProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice());
3912 	const deUint32							shaderGroupHandleSize		= rtProps->getShaderGroupHandleSize();
3913 	const deUint32							shaderGroupBaseAlignment	= rtProps->getShaderGroupBaseAlignment();
3914 
3915 	const VkImageCreateInfo					imageCreateInfo				= makeImageCreateInfo(m_params->width, m_params->height, m_format);
3916 	const VkImageSubresourceRange			imageSubresourceRange		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
3917 	const de::MovePtr<ImageWithMemory>		image						= makeMovePtr<ImageWithMemory>(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any);
3918 	const Move<VkImageView>					view						= makeImageView(vk, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
3919 
3920 	const deUint32							bufferSize					= (m_params->width * m_params->height * mapVkFormat(m_format).getPixelSize());
3921 	const VkBufferCreateInfo				bufferCreateInfo			= makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3922 	de::MovePtr<BufferWithMemory>			resultImageBuffer			= makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
3923 
3924 	const VkImageSubresourceLayers			imageSubresourceLayers		= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
3925 	const VkBufferImageCopy					bufferCopyImageRegion		= makeBufferImageCopy(makeExtent3D(m_params->width, m_params->height, 1u), imageSubresourceLayers);
3926 
3927 	de::MovePtr<RayTracingPipeline>			rtPipeline					= makeMovePtr<RayTracingPipeline>();
3928 	rtPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		*rgenShader, 0);
3929 	rtPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	*chitShader, 1);
3930 	rtPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			*missShader, 2);
3931 
3932 	const Move<VkDescriptorSetLayout>		descriptorSetLayout			= DescriptorSetLayoutBuilder()
3933 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
3934 		.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
3935 		.build(vk, device, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
3936 
3937 	const Move<VkPipelineLayout>			pipelineLayout				= makePipelineLayout(vk, device, *descriptorSetLayout);
3938 	Move<VkPipeline>						pipeline					= rtPipeline->createPipeline(vk, device, *pipelineLayout);
3939 
3940 	de::MovePtr<BufferWithMemory>			rgenSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
3941 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
3942 	VkStridedDeviceAddressRegionKHR			rgenRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **rgenSbt, 0),
3943 																											shaderGroupHandleSize, shaderGroupHandleSize);
3944 	de::MovePtr<BufferWithMemory>			chitSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
3945 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
3946 	VkStridedDeviceAddressRegionKHR			chitRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **chitSbt, 0),
3947 																											shaderGroupHandleSize, shaderGroupHandleSize);
3948 	de::MovePtr<BufferWithMemory>			missSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
3949 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
3950 	VkStridedDeviceAddressRegionKHR			missRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **missSbt, 0),
3951 																											shaderGroupHandleSize, shaderGroupHandleSize);
3952 	const VkStridedDeviceAddressRegionKHR	callRegion					= makeStridedDeviceAddressRegionKHR(VkDeviceAddress(0), 0, 0);
3953 
3954 	const VkClearValue						clearValue					= { { { 0.1f, 0.2f, 0.3f, 0.4f } } };
3955 
3956 	const VkImageMemoryBarrier2KHR			preClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, 0,
3957 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
3958 																								  VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
3959 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
3960 	const VkImageMemoryBarrier2KHR			postClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
3961 																								  VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_READ_BIT_KHR,
3962 																								  VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
3963 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
3964 	const VkDependencyInfoKHR				preClearImageDependency		= u::makeDependency(preClearImageImageBarrier);
3965 	const VkDependencyInfoKHR				postClearImageDependency	= u::makeDependency(postClearImageImageBarrier);
3966 
3967 
3968 	const VkImageMemoryBarrier2KHR			postTraceRaysImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
3969 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_READ_BIT_KHR,
3970 																								  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
3971 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
3972 	const VkImageMemoryBarrier2KHR			postCopyImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,	VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
3973 																								  VK_PIPELINE_STAGE_2_HOST_BIT_KHR, VK_ACCESS_2_HOST_READ_BIT_KHR,
3974 																								  VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
3975 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
3976 	const VkDependencyInfoKHR				postTraceRaysDependency		= u::makeDependency(postTraceRaysImageBarrier);
3977 	const VkDependencyInfoKHR				postCopyImageDependency		= u::makeDependency(postCopyImageImageBarrier);
3978 	const VkPipelineStageFlags2KHR			srcStageMask				= m_params->type == CopyWithinPipelineParams::Type::StageASCopyBit
3979 																			? VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR
3980 																			: VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT_KHR;
3981 	const VkMemoryBarrier2KHR				copyBlasMemoryBarrier		= makeMemoryBarrier2(srcStageMask, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
3982 																							 VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
3983 																							 VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR);
3984 	const VkDependencyInfoKHR				copyBlasDependency			= u::makeDependency(copyBlasMemoryBarrier);
3985 
3986 
3987 	const Move<VkCommandPool>				cmdPool						= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
3988 	const Move<VkCommandBuffer>				cmdBuffer					= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3989 
3990 	std::vector<VkDeviceSize>				blasSize					(1);
3991 	BlasPtr									blas1						(makeBottomLevelAccelerationStructure().release());
3992 
3993 	// After this block the blas1 stays on device or host respectively to its build type.
3994 	// Once it is created it is asked for the serialization size that will be used for a
3995 	// creation of an empty blas2. Probably this size will be bigger than it is needed but
3996 	// one thing that is important is it must not be less.
3997 	{
3998 		const VkQueryType query = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
3999 		Move<VkQueryPool> queryPoolSize = makeQueryPool(vk, device, query, 1);
4000 		beginCommandBuffer(vk, *cmdBuffer);
4001 			blas1->setBuildType(m_params->build);
4002 			blas1->setGeometryData(	{
4003 					{ 0.0, 0.0, 0.0 },
4004 					{ 1.0, 0.0, 0.0 },
4005 					{ 0.0, 1.0, 0.0 }}, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
4006 			blas1->createAndBuild(vk, device, *cmdBuffer, allocator);
4007 			queryAccelerationStructureSize(vk, device, *cmdBuffer, { *blas1->getPtr() }, m_params->build, *queryPoolSize, query, 0u, blasSize);
4008 		endCommandBuffer(vk, *cmdBuffer);
4009 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4010 		if (m_params->build == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
4011 		{
4012 			VK_CHECK(vk.getQueryPoolResults(device, *queryPoolSize, 0u, 1, sizeof(VkDeviceSize), blasSize.data(),
4013 											sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
4014 		}
4015 	}
4016 
4017 	de::MovePtr<BufferWithMemory>			referenceImageBuffer	= getRefImage(blas1);
4018 
4019 	// Create blas2 as empty struct
4020 	BlasPtr									blas2					(makeBottomLevelAccelerationStructure().release());
4021 	blas2->create(vk, device, allocator, blasSize[0]);
4022 
4023 	auto									tlas					= makeTopLevelAccelerationStructure();
4024 	tlas->setBuildType(m_params->build);
4025 	tlas->setInstanceCount(1);
4026 	tlas->addInstance(blas2, identityMatrix3x4, 0, (~0u), 0, VkGeometryInstanceFlagsKHR(0));
4027 
4028 	const VkCopyAccelerationStructureInfoKHR copyBlasInfo
4029 	{
4030 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,		// VkStructureType						sType;
4031 		nullptr,													// const void*							pNext;
4032 		*blas1->getPtr(),											// VkAccelerationStructureKHR			src;
4033 		*blas2->getPtr(),											// VkAccelerationStructureKHR			dst;
4034 		VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR				// VkCopyAccelerationStructureModeKHR	mode;
4035 	};
4036 
4037 	beginCommandBuffer(vk, *cmdBuffer);
4038 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4039 
4040 		if (m_params->build == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
4041 		{
4042 			vk.cmdCopyAccelerationStructureKHR(*cmdBuffer, &copyBlasInfo);
4043 			vk.cmdPipelineBarrier2(*cmdBuffer, &copyBlasDependency);
4044 		}
4045 		else VK_CHECK(vk.copyAccelerationStructureKHR(device, VkDeferredOperationKHR(0), &copyBlasInfo));
4046 
4047 		tlas->createAndBuild(vk, device, *cmdBuffer, allocator);
4048 
4049 		const VkDescriptorImageInfo				descriptorImageInfo			= makeDescriptorImageInfo(VkSampler(), *view, VK_IMAGE_LAYOUT_GENERAL);
4050 		const VkWriteDescriptorSetAccelerationStructureKHR writeDescriptorTlas
4051 		{
4052 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4053 			nullptr,															//  const void*							pNext;
4054 			1,																	//  deUint32							accelerationStructureCount;
4055 			tlas->getPtr()														//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4056 		};
4057 
4058 		DescriptorSetUpdateBuilder()
4059 			.writeSingle(VkDescriptorSet(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
4060 			.writeSingle(VkDescriptorSet(), DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &writeDescriptorTlas)
4061 			.updateWithPush(vk, *cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 0, 2);
4062 
4063 		vk.cmdPipelineBarrier2(*cmdBuffer, &preClearImageDependency);
4064 		vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
4065 		vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImageDependency);
4066 
4067 		cmdTraceRays(vk,
4068 			*cmdBuffer,
4069 			&rgenRegion,	// rgen
4070 			&missRegion,	// miss
4071 			&chitRegion,	// hit
4072 			&callRegion,	// call
4073 			m_params->width, m_params->height, 1);
4074 
4075 		vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4076 		vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **resultImageBuffer, 1u, &bufferCopyImageRegion);
4077 		vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4078 
4079 	endCommandBuffer(vk, *cmdBuffer);
4080 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4081 
4082 	invalidateMappedMemoryRange(vk, device, resultImageBuffer->getAllocation().getMemory(), resultImageBuffer->getAllocation().getOffset(), bufferSize);
4083 
4084 	const void*	referenceImageData	= referenceImageBuffer->getAllocation().getHostPtr();
4085 	const void*	resultImageData		= resultImageBuffer->getAllocation().getHostPtr();
4086 
4087 	return (deMemCmp(referenceImageData, resultImageData, bufferSize) == 0) ? TestStatus::pass("") : TestStatus::fail("Reference and result images differ");
4088 }
4089 
getBufferSizeForSBT(const deUint32 & groupCount,const deUint32 & shaderGroupHandleSize,const deUint32 & shaderGroupBaseAlignment) const4090 VkDeviceSize CopySBTInstance::getBufferSizeForSBT (const deUint32& groupCount, const deUint32&	shaderGroupHandleSize, const deUint32& shaderGroupBaseAlignment) const
4091 {
4092 	DE_UNREF(shaderGroupBaseAlignment);
4093 	return (groupCount * deAlign32(shaderGroupHandleSize, shaderGroupHandleSize));
4094 }
4095 
getBufferForSBT(const deUint32 & groupCount,const deUint32 & shaderGroupHandleSize,const deUint32 & shaderGroupBaseAlignment) const4096 de::MovePtr<BufferWithMemory> CopySBTInstance::getBufferForSBT (const deUint32& groupCount, const deUint32&	shaderGroupHandleSize, const deUint32& shaderGroupBaseAlignment) const
4097 {
4098 	const VkDeviceSize			sbtSize				= getBufferSizeForSBT(groupCount, shaderGroupHandleSize, shaderGroupBaseAlignment);
4099 	const VkBufferUsageFlags	sbtFlags			= VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
4100 	const VkBufferCreateInfo	sbtCreateInfo		= makeBufferCreateInfo(sbtSize, sbtFlags);
4101 	const MemoryRequirement		sbtMemRequirements	= MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
4102 
4103 	return makeMovePtr<BufferWithMemory>(vk, device, allocator, sbtCreateInfo, sbtMemRequirements);
4104 }
4105 
iterate(void)4106 TestStatus CopySBTInstance::iterate (void)
4107 {
4108 	const deUint32							queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
4109 	const VkQueue							queue						= m_context.getUniversalQueue();
4110 
4111 	const de::MovePtr<RayTracingProperties>	rtProps						= makeRayTracingProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice());
4112 	const deUint32							shaderGroupHandleSize		= rtProps->getShaderGroupHandleSize();
4113 	const deUint32							shaderGroupBaseAlignment	= rtProps->getShaderGroupBaseAlignment();
4114 
4115 	const VkImageCreateInfo					imageCreateInfo				= makeImageCreateInfo(m_params->width, m_params->height, m_format);
4116 	const VkImageSubresourceRange			imageSubresourceRange		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
4117 	const de::MovePtr<ImageWithMemory>		image						= makeMovePtr<ImageWithMemory>(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any);
4118 	const Move<VkImageView>					view						= makeImageView(vk, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
4119 
4120 	const deUint32							bufferSize					= (m_params->width * m_params->height * mapVkFormat(m_format).getPixelSize());
4121 	const VkBufferCreateInfo				bufferCreateInfo			= makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4122 	de::MovePtr<BufferWithMemory>			referenceImageBuffer		= makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4123 	de::MovePtr<BufferWithMemory>			resultImageBuffer			= makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4124 
4125 	const VkImageSubresourceLayers			imageSubresourceLayers		= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4126 	const VkBufferImageCopy					bufferCopyImageRegion		= makeBufferImageCopy(makeExtent3D(m_params->width, m_params->height, 1u), imageSubresourceLayers);
4127 
4128 	de::MovePtr<RayTracingPipeline>			rtPipeline					= makeMovePtr<RayTracingPipeline>();
4129 	rtPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		*rgenShader, 0);
4130 	rtPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	*chitShader, 1);
4131 	rtPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			*missShader, 2);
4132 
4133 	const Move<VkDescriptorPool>			descriptorPool				= DescriptorPoolBuilder()
4134 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
4135 		.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
4136 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4137 	const Move<VkDescriptorSetLayout>		descriptorSetLayout			= DescriptorSetLayoutBuilder()
4138 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
4139 		.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
4140 		.build(vk, device);
4141 	const Move<VkDescriptorSet>				descriptorSet				= makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
4142 
4143 	const Move<VkPipelineLayout>			pipelineLayout				= makePipelineLayout(vk, device, *descriptorSetLayout);
4144 	Move<VkPipeline>						pipeline					= rtPipeline->createPipeline(vk, device, *pipelineLayout);
4145 
4146 	de::MovePtr<BufferWithMemory>			sourceRgenSbt				= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4147 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1,
4148 																											   VkBufferCreateFlags(0), VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
4149 	VkStridedDeviceAddressRegionKHR			sourceRgenRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **sourceRgenSbt, 0),
4150 																											shaderGroupHandleSize, shaderGroupHandleSize);
4151 	de::MovePtr<BufferWithMemory>			copyRgenSbt					= getBufferForSBT(1, shaderGroupHandleSize, shaderGroupBaseAlignment);
4152 	VkStridedDeviceAddressRegionKHR			copyRgenRegion				= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **copyRgenSbt, 0),
4153 																											shaderGroupHandleSize, shaderGroupHandleSize);
4154 	de::MovePtr<BufferWithMemory>			chitSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4155 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
4156 	VkStridedDeviceAddressRegionKHR			chitRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **chitSbt, 0),
4157 																											shaderGroupHandleSize, shaderGroupHandleSize);
4158 	de::MovePtr<BufferWithMemory>			missSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4159 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
4160 	VkStridedDeviceAddressRegionKHR			missRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **missSbt, 0),
4161 																											shaderGroupHandleSize, shaderGroupHandleSize);
4162 	const VkStridedDeviceAddressRegionKHR	callRegion					= makeStridedDeviceAddressRegionKHR(VkDeviceAddress(0), 0, 0);
4163 
4164 	const VkClearValue						clearValue					= { { { 0.1f, 0.2f, 0.3f, 0.4f } } };
4165 
4166 	const VkImageMemoryBarrier2KHR			preClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, 0,
4167 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4168 																								  VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4169 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4170 	const VkImageMemoryBarrier2KHR			postClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4171 																								  VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_READ_BIT_KHR,
4172 																								  VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
4173 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4174 	const VkDependencyInfoKHR				preClearImageDependency		= u::makeDependency(preClearImageImageBarrier);
4175 	const VkDependencyInfoKHR				postClearImageDependency	= u::makeDependency(postClearImageImageBarrier);
4176 
4177 
4178 	const VkImageMemoryBarrier2KHR			postTraceRaysImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
4179 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_READ_BIT_KHR,
4180 																								  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4181 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4182 	const VkImageMemoryBarrier2KHR			postCopyImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,	VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4183 																								  VK_PIPELINE_STAGE_2_HOST_BIT_KHR, VK_ACCESS_2_HOST_READ_BIT_KHR,
4184 																								  VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4185 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4186 	const VkDependencyInfoKHR				postTraceRaysDependency		= u::makeDependency(postTraceRaysImageBarrier);
4187 	const VkDependencyInfoKHR				postCopyImageDependency		= u::makeDependency(postCopyImageImageBarrier);
4188 
4189 	const Move<VkCommandPool>				cmdPool						= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
4190 	const Move<VkCommandBuffer>				cmdBuffer					= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4191 
4192 	auto									tlas						= makeTopLevelAccelerationStructure();
4193 	BlasPtr									blas						(makeBottomLevelAccelerationStructure().release());
4194 	blas->setBuildType(m_params->build);
4195 	blas->setGeometryData(	{
4196 			{ 0.0, 0.0, 0.0 },
4197 			{ 1.0, 0.0, 0.0 },
4198 			{ 0.0, 1.0, 0.0 }}, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
4199 	tlas->setBuildType(m_params->build);
4200 	tlas->setInstanceCount(1);
4201 	tlas->addInstance(blas, identityMatrix3x4, 0, (~0u), 0, VkGeometryInstanceFlagsKHR(0));
4202 	beginCommandBuffer(vk, *cmdBuffer);
4203 		blas->createAndBuild(vk, device, *cmdBuffer, allocator);
4204 		tlas->createAndBuild(vk, device, *cmdBuffer, allocator);
4205 	endCommandBuffer(vk, *cmdBuffer);
4206 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4207 
4208 	const VkDescriptorImageInfo				descriptorImageInfo			= makeDescriptorImageInfo(VkSampler(), *view, VK_IMAGE_LAYOUT_GENERAL);
4209 	const VkWriteDescriptorSetAccelerationStructureKHR writeDescriptorTlas
4210 	{
4211 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4212 		nullptr,															//  const void*							pNext;
4213 		1,																	//  deUint32							accelerationStructureCount;
4214 		tlas->getPtr()														//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4215 	};
4216 
4217 	DescriptorSetUpdateBuilder()
4218 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
4219 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &writeDescriptorTlas)
4220 		.update(vk, device);
4221 
4222 	beginCommandBuffer(vk, *cmdBuffer);
4223 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4224 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, nullptr);
4225 		vk.cmdPipelineBarrier2(*cmdBuffer, &preClearImageDependency);
4226 		vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
4227 		vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImageDependency);
4228 		cmdTraceRays(vk,
4229 			*cmdBuffer,
4230 			&sourceRgenRegion,	// rgen
4231 			&missRegion,		// miss
4232 			&chitRegion,		// hit
4233 			&callRegion,		// call
4234 			m_params->width, m_params->height, 1);
4235 		vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4236 		vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **referenceImageBuffer, 1u, &bufferCopyImageRegion);
4237 		vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4238 	endCommandBuffer(vk, *cmdBuffer);
4239 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4240 
4241 
4242 	const VkBufferCopy bufferCopy
4243 	{
4244 		0,	// VkDeviceSize srcOffset;
4245 		0,	// VkDeviceSize srcOffset;
4246 		getBufferSizeForSBT(1, shaderGroupHandleSize, shaderGroupBaseAlignment)
4247 	};
4248 	const VkMemoryBarrier2KHR				postCopySBTMemoryBarrier	= makeMemoryBarrier2(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR,
4249 																							 VkAccessFlags2KHR(0),
4250 																							 VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR,
4251 																							 VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR);
4252 	const VkDependencyInfoKHR				postClearImgCopySBTDependency	= u::makeDependency(postCopySBTMemoryBarrier, postClearImageImageBarrier);
4253 
4254 	beginCommandBuffer(vk, *cmdBuffer);
4255 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4256 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, nullptr);
4257 		vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
4258 		vk.cmdCopyBuffer(*cmdBuffer, **sourceRgenSbt, **copyRgenSbt, 1, &bufferCopy);
4259 		vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImgCopySBTDependency);
4260 		cmdTraceRays(vk,
4261 			*cmdBuffer,
4262 			&copyRgenRegion,	// rgen
4263 			&missRegion,		// miss
4264 			&chitRegion,		// hit
4265 			&callRegion,		// call
4266 			m_params->width, m_params->height, 1);
4267 		vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4268 		vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **resultImageBuffer, 1u, &bufferCopyImageRegion);
4269 		vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4270 	endCommandBuffer(vk, *cmdBuffer);
4271 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4272 
4273 	invalidateMappedMemoryRange(vk, device, referenceImageBuffer->getAllocation().getMemory(), referenceImageBuffer->getAllocation().getOffset(), bufferSize);
4274 	invalidateMappedMemoryRange(vk, device, resultImageBuffer->getAllocation().getMemory(), resultImageBuffer->getAllocation().getOffset(), bufferSize);
4275 
4276 	const void* referenceImageDataPtr	= referenceImageBuffer->getAllocation().getHostPtr();
4277 	const void* resultImageDataPtr		= resultImageBuffer->getAllocation().getHostPtr();
4278 
4279 	return (deMemCmp(referenceImageDataPtr, resultImageDataPtr, bufferSize) == 0) ? TestStatus::pass("") : TestStatus::fail("");
4280 }
4281 
4282 }	// anonymous
4283 
addBasicBuildingTests(tcu::TestCaseGroup * group)4284 void addBasicBuildingTests(tcu::TestCaseGroup* group)
4285 {
4286 	struct
4287 	{
4288 		vk::VkAccelerationStructureBuildTypeKHR	buildType;
4289 		const char*								name;
4290 	} buildTypes[] =
4291 	{
4292 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,				"cpu_built"	},
4293 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,				"gpu_built"	},
4294 	};
4295 
4296 	struct
4297 	{
4298 		BottomTestType							testType;
4299 		bool									usesAOP;
4300 		const char*								name;
4301 	} bottomTestTypes[] =
4302 	{
4303 		{ BTT_TRIANGLES,	false,										"triangles" },
4304 		{ BTT_TRIANGLES,	true,										"triangles_aop" },
4305 		{ BTT_AABBS,		false,										"aabbs" },
4306 		{ BTT_AABBS,		true,										"aabbs_aop" },
4307 	};
4308 
4309 	struct
4310 	{
4311 		TopTestType								testType;
4312 		bool									usesAOP;
4313 		const char*								name;
4314 	} topTestTypes[] =
4315 	{
4316 		{ TTT_IDENTICAL_INSTANCES,	false,								"identical_instances" },
4317 		{ TTT_IDENTICAL_INSTANCES,	true,								"identical_instances_aop" },
4318 		{ TTT_DIFFERENT_INSTANCES,	false,								"different_instances" },
4319 		{ TTT_DIFFERENT_INSTANCES,	true,								"different_instances_aop" },
4320 	};
4321 
4322 	struct BuildFlagsData
4323 	{
4324 		VkBuildAccelerationStructureFlagsKHR	flags;
4325 		const char*								name;
4326 	};
4327 
4328 	BuildFlagsData optimizationTypes[] =
4329 	{
4330 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
4331 		{ VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,	"fasttrace" },
4332 		{ VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR,	"fastbuild" },
4333 	};
4334 
4335 	BuildFlagsData updateTypes[] =
4336 	{
4337 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
4338 		{ VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR,			"update" },
4339 	};
4340 
4341 	BuildFlagsData compactionTypes[] =
4342 	{
4343 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
4344 		{ VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR,		"compaction" },
4345 	};
4346 
4347 	BuildFlagsData lowMemoryTypes[] =
4348 	{
4349 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
4350 		{ VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_KHR,			"lowmemory" },
4351 	};
4352 
4353 	struct
4354 	{
4355 		bool		padVertices;
4356 		const char*	name;
4357 	} paddingType[] =
4358 	{
4359 		{ false,	"nopadding"	},
4360 		{ true,		"padded"	},
4361 	};
4362 
4363 	struct
4364 	{
4365 		bool		topGeneric;
4366 		bool		bottomGeneric;
4367 		const char*	suffix;
4368 	} createGenericParams[] =
4369 	{
4370 		{	false,	false,	""					},
4371 		{	false,	true,	"_bottomgeneric"	},
4372 		{	true,	false,	"_topgeneric"		},
4373 		{	true,	true,	"_bothgeneric"		},
4374 	};
4375 
4376 	for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
4377 	{
4378 		de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name, ""));
4379 
4380 		for (size_t bottomNdx = 0; bottomNdx < DE_LENGTH_OF_ARRAY(bottomTestTypes); ++bottomNdx)
4381 		{
4382 			de::MovePtr<tcu::TestCaseGroup> bottomGroup(new tcu::TestCaseGroup(group->getTestContext(), bottomTestTypes[bottomNdx].name, ""));
4383 
4384 			for (size_t topNdx = 0; topNdx < DE_LENGTH_OF_ARRAY(topTestTypes); ++topNdx)
4385 			{
4386 				de::MovePtr<tcu::TestCaseGroup> topGroup(new tcu::TestCaseGroup(group->getTestContext(), topTestTypes[topNdx].name, ""));
4387 
4388 				for (int paddingTypeIdx = 0; paddingTypeIdx < DE_LENGTH_OF_ARRAY(paddingType); ++paddingTypeIdx)
4389 				{
4390 					de::MovePtr<tcu::TestCaseGroup> paddingGroup(new tcu::TestCaseGroup(group->getTestContext(), paddingType[paddingTypeIdx].name, ""));
4391 
4392 					for (size_t optimizationNdx = 0; optimizationNdx < DE_LENGTH_OF_ARRAY(optimizationTypes); ++optimizationNdx)
4393 					{
4394 						for (size_t updateNdx = 0; updateNdx < DE_LENGTH_OF_ARRAY(updateTypes); ++updateNdx)
4395 						{
4396 							for (size_t compactionNdx = 0; compactionNdx < DE_LENGTH_OF_ARRAY(compactionTypes); ++compactionNdx)
4397 							{
4398 								for (size_t lowMemoryNdx = 0; lowMemoryNdx < DE_LENGTH_OF_ARRAY(lowMemoryTypes); ++lowMemoryNdx)
4399 								{
4400 									for (int createGenericIdx = 0; createGenericIdx < DE_LENGTH_OF_ARRAY(createGenericParams); ++createGenericIdx)
4401 									{
4402 										std::string testName =
4403 											std::string(optimizationTypes[optimizationNdx].name) + "_" +
4404 											std::string(updateTypes[updateNdx].name) + "_" +
4405 											std::string(compactionTypes[compactionNdx].name) + "_" +
4406 											std::string(lowMemoryTypes[lowMemoryNdx].name) +
4407 											std::string(createGenericParams[createGenericIdx].suffix);
4408 
4409 										TestParams testParams
4410 										{
4411 											buildTypes[buildTypeNdx].buildType,
4412 											VK_FORMAT_R32G32B32_SFLOAT,
4413 											paddingType[paddingTypeIdx].padVertices,
4414 											VK_INDEX_TYPE_NONE_KHR,
4415 											bottomTestTypes[bottomNdx].testType,
4416 											InstanceCullFlags::NONE,
4417 											bottomTestTypes[bottomNdx].usesAOP,
4418 											createGenericParams[createGenericIdx].bottomGeneric,
4419 											topTestTypes[topNdx].testType,
4420 											topTestTypes[topNdx].usesAOP,
4421 											createGenericParams[createGenericIdx].topGeneric,
4422 											optimizationTypes[optimizationNdx].flags | updateTypes[updateNdx].flags | compactionTypes[compactionNdx].flags | lowMemoryTypes[lowMemoryNdx].flags,
4423 											OT_NONE,
4424 											OP_NONE,
4425 											RTAS_DEFAULT_SIZE,
4426 											RTAS_DEFAULT_SIZE,
4427 											de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
4428 											0u,
4429 											EmptyAccelerationStructureCase::NOT_EMPTY,
4430 											InstanceCustomIndexCase::NONE,
4431 											false,
4432 											0xFFu,
4433 										};
4434 										paddingGroup->addChild(new RayTracingASBasicTestCase(group->getTestContext(), testName.c_str(), "", testParams));
4435 									}
4436 								}
4437 							}
4438 						}
4439 					}
4440 					topGroup->addChild(paddingGroup.release());
4441 				}
4442 				bottomGroup->addChild(topGroup.release());
4443 			}
4444 			buildGroup->addChild(bottomGroup.release());
4445 		}
4446 		group->addChild(buildGroup.release());
4447 	}
4448 }
4449 
addVertexIndexFormatsTests(tcu::TestCaseGroup * group)4450 void addVertexIndexFormatsTests(tcu::TestCaseGroup* group)
4451 {
4452 	struct
4453 	{
4454 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
4455 		const char*											name;
4456 	} buildTypes[] =
4457 	{
4458 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
4459 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
4460 	};
4461 
4462 	const VkFormat vertexFormats[] =
4463 	{
4464 		// Mandatory formats.
4465 		VK_FORMAT_R32G32_SFLOAT,
4466 		VK_FORMAT_R32G32B32_SFLOAT,
4467 		VK_FORMAT_R16G16_SFLOAT,
4468 		VK_FORMAT_R16G16B16A16_SFLOAT,
4469 		VK_FORMAT_R16G16_SNORM,
4470 		VK_FORMAT_R16G16B16A16_SNORM,
4471 
4472 		// Additional formats.
4473 		VK_FORMAT_R8G8_SNORM,
4474 		VK_FORMAT_R8G8B8_SNORM,
4475 		VK_FORMAT_R8G8B8A8_SNORM,
4476 		VK_FORMAT_R16G16B16_SNORM,
4477 		VK_FORMAT_R16G16B16_SFLOAT,
4478 		VK_FORMAT_R32G32B32A32_SFLOAT,
4479 		VK_FORMAT_R64G64_SFLOAT,
4480 		VK_FORMAT_R64G64B64_SFLOAT,
4481 		VK_FORMAT_R64G64B64A64_SFLOAT,
4482 	};
4483 
4484 	struct
4485 	{
4486 		VkIndexType								indexType;
4487 		const char*								name;
4488 	} indexFormats[] =
4489 	{
4490 		{ VK_INDEX_TYPE_NONE_KHR ,				"index_none"		},
4491 		{ VK_INDEX_TYPE_UINT16 ,				"index_uint16"	},
4492 		{ VK_INDEX_TYPE_UINT32 ,				"index_uint32"	},
4493 	};
4494 
4495 	struct
4496 	{
4497 		bool		padVertices;
4498 		const char*	name;
4499 	} paddingType[] =
4500 	{
4501 		{ false,	"nopadding"	},
4502 		{ true,		"padded"	},
4503 	};
4504 
4505 	for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
4506 	{
4507 		de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name, ""));
4508 
4509 		for (size_t vertexFormatNdx = 0; vertexFormatNdx < DE_LENGTH_OF_ARRAY(vertexFormats); ++vertexFormatNdx)
4510 		{
4511 			const auto format		= vertexFormats[vertexFormatNdx];
4512 			const auto formatName	= getFormatSimpleName(format);
4513 
4514 			de::MovePtr<tcu::TestCaseGroup> vertexFormatGroup(new tcu::TestCaseGroup(group->getTestContext(), formatName.c_str(), ""));
4515 
4516 			for (int paddingIdx = 0; paddingIdx < DE_LENGTH_OF_ARRAY(paddingType); ++paddingIdx)
4517 			{
4518 				de::MovePtr<tcu::TestCaseGroup> paddingGroup(new tcu::TestCaseGroup(group->getTestContext(), paddingType[paddingIdx].name, ""));
4519 
4520 				for (size_t indexFormatNdx = 0; indexFormatNdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatNdx)
4521 				{
4522 					TestParams testParams
4523 					{
4524 						buildTypes[buildTypeNdx].buildType,
4525 						format,
4526 						paddingType[paddingIdx].padVertices,
4527 						indexFormats[indexFormatNdx].indexType,
4528 						BTT_TRIANGLES,
4529 						InstanceCullFlags::NONE,
4530 						false,
4531 						false,
4532 						TTT_IDENTICAL_INSTANCES,
4533 						false,
4534 						false,
4535 						VkBuildAccelerationStructureFlagsKHR(0u),
4536 						OT_NONE,
4537 						OP_NONE,
4538 						RTAS_DEFAULT_SIZE,
4539 						RTAS_DEFAULT_SIZE,
4540 						de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
4541 						0u,
4542 						EmptyAccelerationStructureCase::NOT_EMPTY,
4543 						InstanceCustomIndexCase::NONE,
4544 						false,
4545 						0xFFu,
4546 					};
4547 					paddingGroup->addChild(new RayTracingASBasicTestCase(group->getTestContext(), indexFormats[indexFormatNdx].name, "", testParams));
4548 				}
4549 				vertexFormatGroup->addChild(paddingGroup.release());
4550 			}
4551 			buildGroup->addChild(vertexFormatGroup.release());
4552 		}
4553 		group->addChild(buildGroup.release());
4554 	}
4555 }
4556 
addOperationTestsImpl(tcu::TestCaseGroup * group,const deUint32 workerThreads)4557 void addOperationTestsImpl (tcu::TestCaseGroup* group, const deUint32 workerThreads)
4558 {
4559 	struct
4560 	{
4561 		OperationType										operationType;
4562 		const char*											name;
4563 	} operationTypes[] =
4564 	{
4565 		{ OP_COPY,											"copy"			},
4566 		{ OP_COMPACT,										"compaction"	},
4567 		{ OP_SERIALIZE,										"serialization"	},
4568 	};
4569 
4570 	struct
4571 	{
4572 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
4573 		const char*											name;
4574 	} buildTypes[] =
4575 	{
4576 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
4577 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
4578 	};
4579 
4580 	struct
4581 	{
4582 		OperationTarget										operationTarget;
4583 		const char*											name;
4584 	} operationTargets[] =
4585 	{
4586 		{ OT_TOP_ACCELERATION,								"top_acceleration_structure"		},
4587 		{ OT_BOTTOM_ACCELERATION,							"bottom_acceleration_structure"	},
4588 	};
4589 
4590 	struct
4591 	{
4592 		BottomTestType										testType;
4593 		const char*											name;
4594 	} bottomTestTypes[] =
4595 	{
4596 		{ BTT_TRIANGLES,									"triangles" },
4597 		{ BTT_AABBS,										"aabbs" },
4598 	};
4599 
4600 	for (size_t operationTypeNdx = 0; operationTypeNdx < DE_LENGTH_OF_ARRAY(operationTypes); ++operationTypeNdx)
4601 	{
4602 		if (workerThreads > 0)
4603 			if (operationTypes[operationTypeNdx].operationType != OP_COPY && operationTypes[operationTypeNdx].operationType != OP_SERIALIZE)
4604 				continue;
4605 
4606 		de::MovePtr<tcu::TestCaseGroup> operationTypeGroup(new tcu::TestCaseGroup(group->getTestContext(), operationTypes[operationTypeNdx].name, ""));
4607 
4608 		for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
4609 		{
4610 			if (workerThreads > 0 && buildTypes[buildTypeNdx].buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
4611 				continue;
4612 
4613 			de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name, ""));
4614 
4615 			for (size_t operationTargetNdx = 0; operationTargetNdx < DE_LENGTH_OF_ARRAY(operationTargets); ++operationTargetNdx)
4616 			{
4617 				de::MovePtr<tcu::TestCaseGroup> operationTargetGroup(new tcu::TestCaseGroup(group->getTestContext(), operationTargets[operationTargetNdx].name, ""));
4618 
4619 				for (size_t testTypeNdx = 0; testTypeNdx < DE_LENGTH_OF_ARRAY(bottomTestTypes); ++testTypeNdx)
4620 				{
4621 					TopTestType topTest = (operationTargets[operationTargetNdx].operationTarget == OT_TOP_ACCELERATION) ? TTT_DIFFERENT_INSTANCES : TTT_IDENTICAL_INSTANCES;
4622 
4623 					TestParams testParams
4624 					{
4625 						buildTypes[buildTypeNdx].buildType,
4626 						VK_FORMAT_R32G32B32_SFLOAT,
4627 						false,
4628 						VK_INDEX_TYPE_NONE_KHR,
4629 						bottomTestTypes[testTypeNdx].testType,
4630 						InstanceCullFlags::NONE,
4631 						false,
4632 						false,
4633 						topTest,
4634 						false,
4635 						false,
4636 						VkBuildAccelerationStructureFlagsKHR(0u),
4637 						operationTargets[operationTargetNdx].operationTarget,
4638 						operationTypes[operationTypeNdx].operationType,
4639 						RTAS_DEFAULT_SIZE,
4640 						RTAS_DEFAULT_SIZE,
4641 						de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
4642 						workerThreads,
4643 						EmptyAccelerationStructureCase::NOT_EMPTY,
4644 						InstanceCustomIndexCase::NONE,
4645 						false,
4646 						0xFFu,
4647 					};
4648 					operationTargetGroup->addChild(new RayTracingASBasicTestCase(group->getTestContext(), bottomTestTypes[testTypeNdx].name, "", testParams));
4649 				}
4650 				buildGroup->addChild(operationTargetGroup.release());
4651 			}
4652 			operationTypeGroup->addChild(buildGroup.release());
4653 		}
4654 		group->addChild(operationTypeGroup.release());
4655 	}
4656 }
4657 
addOperationTests(tcu::TestCaseGroup * group)4658 void addOperationTests (tcu::TestCaseGroup* group)
4659 {
4660 	addOperationTestsImpl(group, 0);
4661 }
4662 
addHostThreadingOperationTests(tcu::TestCaseGroup * group)4663 void addHostThreadingOperationTests (tcu::TestCaseGroup* group)
4664 {
4665 	const deUint32	threads[]	= { 1, 2, 3, 4, 8, std::numeric_limits<deUint32>::max() };
4666 
4667 	for (size_t threadsNdx = 0; threadsNdx < DE_LENGTH_OF_ARRAY(threads); ++threadsNdx)
4668 	{
4669 		const std::string groupName = threads[threadsNdx] != std::numeric_limits<deUint32>::max()
4670 									? de::toString(threads[threadsNdx])
4671 									: "max";
4672 
4673 		de::MovePtr<tcu::TestCaseGroup> threadGroup(new tcu::TestCaseGroup(group->getTestContext(), groupName.c_str(), ""));
4674 
4675 		addOperationTestsImpl(threadGroup.get(), threads[threadsNdx]);
4676 
4677 		group->addChild(threadGroup.release());
4678 	}
4679 }
4680 
addFuncArgTests(tcu::TestCaseGroup * group)4681 void addFuncArgTests (tcu::TestCaseGroup* group)
4682 {
4683 	const struct
4684 	{
4685 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
4686 		const char*											name;
4687 	} buildTypes[] =
4688 	{
4689 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
4690 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
4691 	};
4692 
4693 	auto& ctx = group->getTestContext();
4694 
4695 	for (int buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
4696 	{
4697 		TestParams testParams
4698 		{
4699 			buildTypes[buildTypeNdx].buildType,
4700 			VK_FORMAT_R32G32B32_SFLOAT,
4701 			false,
4702 			VK_INDEX_TYPE_NONE_KHR,
4703 			BTT_TRIANGLES,
4704 			InstanceCullFlags::NONE,
4705 			false,
4706 			false,
4707 			TTT_IDENTICAL_INSTANCES,
4708 			false,
4709 			false,
4710 			VkBuildAccelerationStructureFlagsKHR(0u),
4711 			OT_NONE,
4712 			OP_NONE,
4713 			RTAS_DEFAULT_SIZE,
4714 			RTAS_DEFAULT_SIZE,
4715 			de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
4716 			0u,
4717 			EmptyAccelerationStructureCase::NOT_EMPTY,
4718 			InstanceCustomIndexCase::NONE,
4719 			false,
4720 			0xFFu,
4721 		};
4722 
4723 		group->addChild(new RayTracingASFuncArgTestCase(ctx, buildTypes[buildTypeNdx].name, "", testParams));
4724 	}
4725 }
4726 
addInstanceTriangleCullingTests(tcu::TestCaseGroup * group)4727 void addInstanceTriangleCullingTests (tcu::TestCaseGroup* group)
4728 {
4729 	const struct
4730 	{
4731 		InstanceCullFlags	cullFlags;
4732 		std::string			name;
4733 	} cullFlags[] =
4734 	{
4735 		{ InstanceCullFlags::NONE,				"noflags"		},
4736 		{ InstanceCullFlags::COUNTERCLOCKWISE,	"ccw"			},
4737 		{ InstanceCullFlags::CULL_DISABLE,		"nocull"		},
4738 		{ InstanceCullFlags::ALL,				"ccw_nocull"	},
4739 	};
4740 
4741 	const struct
4742 	{
4743 		TopTestType	topType;
4744 		std::string	name;
4745 	} topType[] =
4746 	{
4747 		{ TTT_DIFFERENT_INSTANCES, "transformed"	},	// Each instance has its own transformation matrix.
4748 		{ TTT_IDENTICAL_INSTANCES, "notransform"	},	// "Identical" instances, different geometries.
4749 	};
4750 
4751 	const struct
4752 	{
4753 		vk::VkAccelerationStructureBuildTypeKHR	buildType;
4754 		std::string								name;
4755 	} buildTypes[] =
4756 	{
4757 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
4758 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
4759 	};
4760 
4761 	const struct
4762 	{
4763 		VkIndexType	indexType;
4764 		std::string	name;
4765 	} indexFormats[] =
4766 	{
4767 		{ VK_INDEX_TYPE_NONE_KHR ,	"index_none"	},
4768 		{ VK_INDEX_TYPE_UINT16 ,	"index_uint16"	},
4769 		{ VK_INDEX_TYPE_UINT32 ,	"index_uint32"	},
4770 	};
4771 
4772 	auto& ctx = group->getTestContext();
4773 
4774 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
4775 	{
4776 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
4777 
4778 		for (int indexFormatIdx = 0; indexFormatIdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatIdx)
4779 		{
4780 			de::MovePtr<tcu::TestCaseGroup> indexTypeGroup(new tcu::TestCaseGroup(ctx, indexFormats[indexFormatIdx].name.c_str(), ""));
4781 
4782 			for (int topTypeIdx = 0; topTypeIdx < DE_LENGTH_OF_ARRAY(topType); ++topTypeIdx)
4783 			{
4784 				for (int cullFlagsIdx = 0; cullFlagsIdx < DE_LENGTH_OF_ARRAY(cullFlags); ++cullFlagsIdx)
4785 				{
4786 					const std::string testName = topType[topTypeIdx].name + "_" + cullFlags[cullFlagsIdx].name;
4787 
4788 					TestParams testParams
4789 					{
4790 						buildTypes[buildTypeIdx].buildType,
4791 						VK_FORMAT_R32G32B32_SFLOAT,
4792 						false,
4793 						indexFormats[indexFormatIdx].indexType,
4794 						BTT_TRIANGLES,
4795 						cullFlags[cullFlagsIdx].cullFlags,
4796 						false,
4797 						false,
4798 						topType[topTypeIdx].topType,
4799 						false,
4800 						false,
4801 						VkBuildAccelerationStructureFlagsKHR(0u),
4802 						OT_NONE,
4803 						OP_NONE,
4804 						RTAS_DEFAULT_SIZE,
4805 						RTAS_DEFAULT_SIZE,
4806 						de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
4807 						0u,
4808 						EmptyAccelerationStructureCase::NOT_EMPTY,
4809 						InstanceCustomIndexCase::NONE,
4810 						false,
4811 						0xFFu,
4812 					};
4813 					indexTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, testName.c_str(), "", testParams));
4814 				}
4815 			}
4816 			buildTypeGroup->addChild(indexTypeGroup.release());
4817 		}
4818 		group->addChild(buildTypeGroup.release());
4819 	}
4820 }
4821 
addDynamicIndexingTests(tcu::TestCaseGroup * group)4822 void addDynamicIndexingTests (tcu::TestCaseGroup* group)
4823 {
4824 	auto& ctx = group->getTestContext();
4825 	group->addChild(new RayTracingASDynamicIndexingTestCase(ctx, "dynamic_indexing"));
4826 }
4827 
addEmptyAccelerationStructureTests(tcu::TestCaseGroup * group)4828 void addEmptyAccelerationStructureTests (tcu::TestCaseGroup* group)
4829 {
4830 	const struct
4831 	{
4832 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
4833 		std::string											name;
4834 	} buildTypes[] =
4835 	{
4836 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
4837 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
4838 	};
4839 
4840 	const struct
4841 	{
4842 		VkIndexType								indexType;
4843 		std::string								name;
4844 	} indexFormats[] =
4845 	{
4846 		{ VK_INDEX_TYPE_NONE_KHR,				"index_none"	},
4847 		{ VK_INDEX_TYPE_UINT16,					"index_uint16"	},
4848 		{ VK_INDEX_TYPE_UINT32,					"index_uint32"	},
4849 	};
4850 
4851 	const struct
4852 	{
4853 		EmptyAccelerationStructureCase	emptyASCase;
4854 		std::string						name;
4855 	} emptyCases[] =
4856 	{
4857 		{ EmptyAccelerationStructureCase::INACTIVE_TRIANGLES,	"inactive_triangles"	},
4858 		{ EmptyAccelerationStructureCase::INACTIVE_INSTANCES,	"inactive_instances"	},
4859 		{ EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM,	"no_geometries_bottom"	},
4860 		{ EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP,	"no_primitives_top"		},
4861 		{ EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM,	"no_primitives_bottom"	},
4862 	};
4863 
4864 	auto& ctx = group->getTestContext();
4865 
4866 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
4867 	{
4868 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
4869 
4870 		for (int indexFormatIdx = 0; indexFormatIdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatIdx)
4871 		{
4872 			de::MovePtr<tcu::TestCaseGroup> indexTypeGroup(new tcu::TestCaseGroup(ctx, indexFormats[indexFormatIdx].name.c_str(), ""));
4873 
4874 			for (int emptyCaseIdx = 0; emptyCaseIdx < DE_LENGTH_OF_ARRAY(emptyCases); ++emptyCaseIdx)
4875 			{
4876 
4877 				TestParams testParams
4878 				{
4879 					buildTypes[buildTypeIdx].buildType,
4880 					VK_FORMAT_R32G32B32_SFLOAT,
4881 					false,
4882 					indexFormats[indexFormatIdx].indexType,
4883 					BTT_TRIANGLES,
4884 					InstanceCullFlags::NONE,
4885 					false,
4886 					false,
4887 					TTT_IDENTICAL_INSTANCES,
4888 					false,
4889 					false,
4890 					VkBuildAccelerationStructureFlagsKHR(0u),
4891 					OT_NONE,
4892 					OP_NONE,
4893 					RTAS_DEFAULT_SIZE,
4894 					RTAS_DEFAULT_SIZE,
4895 					de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
4896 					0u,
4897 					emptyCases[emptyCaseIdx].emptyASCase,
4898 					InstanceCustomIndexCase::NONE,
4899 					false,
4900 					0xFFu,
4901 				};
4902 				indexTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, emptyCases[emptyCaseIdx].name.c_str(), "", testParams));
4903 			}
4904 			buildTypeGroup->addChild(indexTypeGroup.release());
4905 		}
4906 		group->addChild(buildTypeGroup.release());
4907 	}
4908 }
4909 
addInstanceIndexTests(tcu::TestCaseGroup * group)4910 void addInstanceIndexTests (tcu::TestCaseGroup* group)
4911 {
4912 	const struct
4913 	{
4914 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
4915 		std::string											name;
4916 	} buildTypes[] =
4917 	{
4918 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
4919 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
4920 	};
4921 
4922 	const struct
4923 	{
4924 		InstanceCustomIndexCase						customIndexCase;
4925 		std::string									name;
4926 	} customIndexCases[] =
4927 	{
4928 		{ InstanceCustomIndexCase::NONE,			"no_instance_index"	},
4929 		{ InstanceCustomIndexCase::ANY_HIT,			"ahit"				},
4930 		{ InstanceCustomIndexCase::CLOSEST_HIT,		"chit"				},
4931 		{ InstanceCustomIndexCase::INTERSECTION,	"isec"				},
4932 	};
4933 
4934 	auto& ctx = group->getTestContext();
4935 
4936 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
4937 	{
4938 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
4939 
4940 		for (int customIndexCaseIdx = 0; customIndexCaseIdx < DE_LENGTH_OF_ARRAY(customIndexCases); ++customIndexCaseIdx)
4941 		{
4942 			const auto&	idxCase				= customIndexCases[customIndexCaseIdx].customIndexCase;
4943 			const auto	bottomGeometryType	= ((idxCase == InstanceCustomIndexCase::INTERSECTION) ? BTT_AABBS : BTT_TRIANGLES);
4944 
4945 			TestParams testParams
4946 			{
4947 				buildTypes[buildTypeIdx].buildType,
4948 				VK_FORMAT_R32G32B32_SFLOAT,
4949 				false,
4950 				VK_INDEX_TYPE_NONE_KHR,
4951 				bottomGeometryType,
4952 				InstanceCullFlags::NONE,
4953 				false,
4954 				false,
4955 				TTT_IDENTICAL_INSTANCES,
4956 				false,
4957 				false,
4958 				VkBuildAccelerationStructureFlagsKHR(0u),
4959 				OT_NONE,
4960 				OP_NONE,
4961 				RTAS_DEFAULT_SIZE,
4962 				RTAS_DEFAULT_SIZE,
4963 				de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
4964 				0u,
4965 				EmptyAccelerationStructureCase::NOT_EMPTY,
4966 				customIndexCases[customIndexCaseIdx].customIndexCase,
4967 				false,
4968 				0xFFu,
4969 			};
4970 			buildTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, customIndexCases[customIndexCaseIdx].name.c_str(), "", testParams));
4971 		}
4972 		group->addChild(buildTypeGroup.release());
4973 	}
4974 }
4975 
4976 
addInstanceRayCullMaskTests(tcu::TestCaseGroup * group)4977 void addInstanceRayCullMaskTests(tcu::TestCaseGroup* group)
4978 {
4979 	const struct
4980 	{
4981 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
4982 		std::string											name;
4983 	} buildTypes[] =
4984 	{
4985 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
4986 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
4987 	};
4988 
4989 	const struct
4990 	{
4991 		InstanceCustomIndexCase						customIndexCase;
4992 		std::string									name;
4993 	} customIndexCases[] =
4994 	{
4995 		{ InstanceCustomIndexCase::ANY_HIT,			"ahit"				},
4996 		{ InstanceCustomIndexCase::CLOSEST_HIT,		"chit"				},
4997 		{ InstanceCustomIndexCase::INTERSECTION,	"isec"				},
4998 	};
4999 
5000 	const struct
5001 	{
5002 		uint32_t		cullMask;
5003 		std::string		name;
5004 	} cullMask[] =
5005 	{
5006 		{ 0x000000AAu,	"4_bits"},
5007 		{ 0x00000055u,	"4_bits_reverse"},
5008 		{ 0xAAAAAAAAu,	"16_bits"},
5009 		{ 0x55555555u,	"16_bits_reverse"},
5010 	};
5011 
5012 	auto& ctx = group->getTestContext();
5013 
5014 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5015 	{
5016 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
5017 
5018 		for (int customIndexCaseIdx = 0; customIndexCaseIdx < DE_LENGTH_OF_ARRAY(customIndexCases); ++customIndexCaseIdx)
5019 		{
5020 			de::MovePtr<tcu::TestCaseGroup> customIndexCaseGroup(new tcu::TestCaseGroup(ctx, customIndexCases[customIndexCaseIdx].name.c_str(), ""));
5021 
5022 			for (int cullMaskIdx = 0; cullMaskIdx < DE_LENGTH_OF_ARRAY(cullMask); ++cullMaskIdx)
5023 			{
5024 				const auto& idxCase = customIndexCases[customIndexCaseIdx].customIndexCase;
5025 				const auto	bottomGeometryType = ((idxCase == InstanceCustomIndexCase::INTERSECTION) ? BTT_AABBS : BTT_TRIANGLES);
5026 
5027 				TestParams testParams
5028 				{
5029 					buildTypes[buildTypeIdx].buildType,
5030 					VK_FORMAT_R32G32B32_SFLOAT,
5031 					false,
5032 					VK_INDEX_TYPE_NONE_KHR,
5033 					bottomGeometryType,
5034 					InstanceCullFlags::NONE,
5035 					false,
5036 					false,
5037 					TTT_IDENTICAL_INSTANCES,
5038 					false,
5039 					false,
5040 					VkBuildAccelerationStructureFlagsKHR(0u),
5041 					OT_NONE,
5042 					OP_NONE,
5043 					RTAS_DEFAULT_SIZE,
5044 					RTAS_DEFAULT_SIZE,
5045 					de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5046 					0u,
5047 					EmptyAccelerationStructureCase::NOT_EMPTY,
5048 					customIndexCases[customIndexCaseIdx].customIndexCase,
5049 					true,
5050 					cullMask[cullMaskIdx].cullMask,
5051 				};
5052 				customIndexCaseGroup->addChild(new RayTracingASBasicTestCase(ctx,  cullMask[cullMaskIdx].name.c_str(), "", testParams));
5053 			}
5054 			buildTypeGroup->addChild(customIndexCaseGroup.release());
5055 		}
5056 		group->addChild(buildTypeGroup.release());
5057 	}
5058 }
5059 
5060 
addGetDeviceAccelerationStructureCompabilityTests(tcu::TestCaseGroup * group)5061 void addGetDeviceAccelerationStructureCompabilityTests (tcu::TestCaseGroup* group)
5062 {
5063 	struct
5064 	{
5065 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
5066 		std::string											name;
5067 	}
5068 	const buildTypes[] =
5069 	{
5070 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5071 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5072 	};
5073 
5074 	struct
5075 	{
5076 		OperationTarget	target;
5077 		std::string		name;
5078 	}
5079 	const targets[] =
5080 	{
5081 		{ OT_TOP_ACCELERATION,		"top" },
5082 		{ OT_BOTTOM_ACCELERATION,	"bottom" },
5083 	};
5084 
5085 	auto& ctx = group->getTestContext();
5086 
5087 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5088 	{
5089 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
5090 
5091 		for (int targetIdx = 0; targetIdx < DE_LENGTH_OF_ARRAY(targets); ++targetIdx)
5092 		{
5093 			TestParams testParams
5094 			{
5095 				buildTypes[buildTypeIdx].buildType,									// buildType		- are we making AS on CPU or GPU
5096 				VK_FORMAT_R32G32B32_SFLOAT,											// vertexFormat
5097 				false,																// padVertices
5098 				VK_INDEX_TYPE_NONE_KHR,												// indexType
5099 				BTT_TRIANGLES,														// bottomTestType	- what kind of geometry is stored in bottom AS
5100 				InstanceCullFlags::NONE,											// cullFlags		- Flags for instances, if needed.
5101 				false,																// bottomUsesAOP	- does bottom AS use arrays, or arrays of pointers
5102 				false,																// bottomGeneric	- Bottom created as generic AS type.
5103 				TTT_IDENTICAL_INSTANCES,											// topTestType		- If instances are identical then bottom geometries must have different vertices/aabbs
5104 				false,																// topUsesAOP		- does top AS use arrays, or arrays of pointers
5105 				false,																// topGeneric		- Top created as generic AS type.
5106 				VkBuildAccelerationStructureFlagsKHR(0u),							// buildFlags
5107 				targets[targetIdx].target,											// operationTarget
5108 				OP_NONE,															// operationType
5109 				RTAS_DEFAULT_SIZE,													// width
5110 				RTAS_DEFAULT_SIZE,													// height
5111 				de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),	// testConfiguration
5112 				0u,																	// workerThreadsCount
5113 				EmptyAccelerationStructureCase::NOT_EMPTY,							// emptyASCase
5114 				InstanceCustomIndexCase::NONE,										// instanceCustomIndexCase
5115 				false,																// useCullMask
5116 				0xFFu,																// cullMask
5117 			};
5118 			buildTypeGroup->addChild(new RayTracingDeviceASCompabilityKHRTestCase(ctx, targets[targetIdx].name.c_str(), de::SharedPtr<TestParams>(new TestParams(testParams))));
5119 		}
5120 		group->addChild(buildTypeGroup.release());
5121 	}
5122 }
5123 
addUpdateHeaderBottomAddressTests(tcu::TestCaseGroup * group)5124 void addUpdateHeaderBottomAddressTests (tcu::TestCaseGroup* group)
5125 {
5126 	struct
5127 	{
5128 		vk::VkAccelerationStructureBuildTypeKHR		buildType;
5129 		std::string									name;
5130 	}
5131 	const buildTypes[] =
5132 	{
5133 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5134 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5135 	};
5136 
5137 	struct
5138 	{
5139 		TopTestType	type;
5140 		std::string	name;
5141 	}
5142 	const instTypes[] =
5143 	{
5144 		{ TTT_IDENTICAL_INSTANCES,	"the_same_instances"		},
5145 		{ TTT_DIFFERENT_INSTANCES,	"different_instances"		},
5146 		{ TTT_MIX_INSTANCES,		"mix_same_diff_instances"	},
5147 	};
5148 
5149 	auto& ctx = group->getTestContext();
5150 
5151 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5152 	{
5153 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
5154 
5155 		for (int instTypeIdx = 0; instTypeIdx < DE_LENGTH_OF_ARRAY(instTypes); ++instTypeIdx)
5156 		{
5157 			TestParams testParams
5158 			{
5159 				buildTypes[buildTypeIdx].buildType,									// buildType
5160 				VK_FORMAT_R32G32B32_SFLOAT,											// vertexFormat
5161 				false,																// padVertices
5162 				VK_INDEX_TYPE_NONE_KHR,												// indexType
5163 				BTT_TRIANGLES,														// bottomTestType
5164 				InstanceCullFlags::NONE,											// cullFlags
5165 				false,																// bottomUsesAOP
5166 				false,																// bottomGeneric
5167 				instTypes[instTypeIdx].type,										// topTestType
5168 				false,																// topUsesAOP
5169 				false,																// topGeneric
5170 				VkBuildAccelerationStructureFlagsKHR(0u),							// buildFlags
5171 				OT_TOP_ACCELERATION,												// operationTarget
5172 				OP_NONE,															// operationType
5173 				RTAS_DEFAULT_SIZE,													// width
5174 				RTAS_DEFAULT_SIZE,													// height
5175 				de::SharedPtr<TestConfiguration>(DE_NULL),							// testConfiguration
5176 				0u,																	// workerThreadsCount
5177 				EmptyAccelerationStructureCase::NOT_EMPTY,							// emptyASCase
5178 				InstanceCustomIndexCase::NONE,										// instanceCustomIndexCase
5179 				false,																// useCullMask
5180 				0xFFu,																// cullMask
5181 			};
5182 			buildTypeGroup->addChild(new RayTracingHeaderBottomAddressTestCase(ctx, instTypes[instTypeIdx].name.c_str(), de::SharedPtr<TestParams>(new TestParams(testParams))));
5183 		}
5184 		group->addChild(buildTypeGroup.release());
5185 	}
5186 }
5187 
addQueryPoolResultsTests(TestCaseGroup * group)5188 void addQueryPoolResultsTests (TestCaseGroup* group)
5189 {
5190 	std::pair<VkAccelerationStructureBuildTypeKHR, const char*>
5191 	const buildTypes[]
5192 	{
5193 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu"	},
5194 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu"	},
5195 	};
5196 
5197 	std::pair<bool, const char*>
5198 	const storeTypes[]
5199 	{
5200 		{ false,	"memory"	},
5201 		{ true,		"buffer"	}
5202 	};
5203 
5204 	std::pair<QueryPoolResultsParams::Type, const char*>
5205 	const queryTypes[]
5206 	{
5207 		{ QueryPoolResultsParams::Type::StructureSize,	"structure_size"	},
5208 		{ QueryPoolResultsParams::Type::PointerCount,	"pointer_count"		}
5209 	};
5210 
5211 	std::pair<bool, const char*>
5212 		const buildWithCompacted[]
5213 	{
5214 		{ false,	"no_compacted"		},
5215 		{ true,		"enable_compacted"	}
5216 	};
5217 
5218 	auto& testContext = group->getTestContext();
5219 	for (const auto& buildType : buildTypes)
5220 	{
5221 		auto buildTypeGroup = makeMovePtr<TestCaseGroup>(testContext, buildType.second, "");
5222 		for (const auto& compacted : buildWithCompacted)
5223 		{
5224 			auto buildCompactedGroup = makeMovePtr<TestCaseGroup>(testContext, compacted.second, "");
5225 			for (const auto& storeType : storeTypes)
5226 			{
5227 				auto storeTypeGroup = makeMovePtr<TestCaseGroup>(testContext, storeType.second, "");
5228 				for (const auto& queryType : queryTypes)
5229 				{
5230 					QueryPoolResultsParams	p;
5231 					p.buildType = buildType.first;
5232 					p.inVkBuffer = storeType.first;
5233 					p.queryType = queryType.first;
5234 					p.blasCount = 5;
5235 					p.compacted = compacted.first;
5236 
5237 					storeTypeGroup->addChild(new QueryPoolResultsCase(testContext, queryType.second, makeSharedFrom(p)));
5238 				}
5239 				buildCompactedGroup->addChild(storeTypeGroup.release());
5240 			}
5241 			buildTypeGroup->addChild(buildCompactedGroup.release());
5242 		}
5243 		group->addChild(buildTypeGroup.release());
5244 	}
5245 }
5246 
addCopyWithinPipelineTests(TestCaseGroup * group)5247 void addCopyWithinPipelineTests (TestCaseGroup* group)
5248 {
5249 	std::pair<VkAccelerationStructureBuildTypeKHR, const char*>
5250 	const buildTypes[]
5251 	{
5252 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu"	},
5253 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu"	},
5254 	};
5255 	std::pair<CopyWithinPipelineParams::Type, const char*>
5256 	const testTypes[]
5257 	{
5258 		{ CopyWithinPipelineParams::Type::StageASCopyBit,		"stage_as_copy_bit"  },
5259 		{ CopyWithinPipelineParams::Type::StageAllTransferBit,	"stage_all_transfer" },
5260 		{ CopyWithinPipelineParams::Type::AccessSBTReadBit,		"access_sbt_read"	 }
5261 	};
5262 
5263 	auto& testContext = group->getTestContext();
5264 	for (const auto& buildType : buildTypes)
5265 	{
5266 		auto buildTypeGroup	= makeMovePtr<TestCaseGroup>(testContext, buildType.second, "");
5267 		for (const auto& testType : testTypes)
5268 		{
5269 			CopyWithinPipelineParams	p;
5270 			p.width		= 16;
5271 			p.height	= 16;
5272 			p.build		= buildType.first;
5273 			p.type		= testType.first;
5274 
5275 			buildTypeGroup->addChild(new PipelineStageASCase(testContext, testType.second, makeSharedFrom(p)));
5276 		}
5277 		group->addChild(buildTypeGroup.release());
5278 	}
5279 }
5280 
createAccelerationStructuresTests(tcu::TestContext & testCtx)5281 tcu::TestCaseGroup*	createAccelerationStructuresTests(tcu::TestContext& testCtx)
5282 {
5283 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "acceleration_structures", "Acceleration structure tests"));
5284 
5285 	addTestGroup(group.get(), "flags", "Test building AS with different build types, build flags and geometries/instances using arrays or arrays of pointers", addBasicBuildingTests);
5286 	addTestGroup(group.get(), "format", "Test building AS with different vertex and index formats", addVertexIndexFormatsTests);
5287 	addTestGroup(group.get(), "operations", "Test copying, compaction and serialization of AS", addOperationTests);
5288 	addTestGroup(group.get(), "host_threading", "Test host threading operations", addHostThreadingOperationTests);
5289 	addTestGroup(group.get(), "function_argument", "Test using AS as function argument using both pointers and bare values", addFuncArgTests);
5290 	addTestGroup(group.get(), "instance_triangle_culling", "Test building AS with counterclockwise triangles and/or disabling face culling", addInstanceTriangleCullingTests);
5291 	addTestGroup(group.get(), "ray_cull_mask", "Test for CullMaskKHR builtin as a part of VK_KHR_ray_tracing_maintenance1", addInstanceRayCullMaskTests);
5292 	addTestGroup(group.get(), "dynamic_indexing", "Exercise dynamic indexing of acceleration structures", addDynamicIndexingTests);
5293 	addTestGroup(group.get(), "empty", "Test building empty acceleration structures using different methods", addEmptyAccelerationStructureTests);
5294 	addTestGroup(group.get(), "instance_index", "Test using different values for the instance index and checking them in shaders", addInstanceIndexTests);
5295 	addTestGroup(group.get(), "device_compability_khr", "", addGetDeviceAccelerationStructureCompabilityTests);
5296 	addTestGroup(group.get(), "header_bottom_address", "", addUpdateHeaderBottomAddressTests);
5297 	addTestGroup(group.get(), "query_pool_results", "Test for a new VkQueryPool queries for VK_KHR_ray_tracing_maintenance1", addQueryPoolResultsTests);
5298 	addTestGroup(group.get(), "copy_within_pipeline", "Tests ACCELLERATION_STRUCTURE_COPY and ACCESS_2_SBT_READ with VK_KHR_ray_tracing_maintenance1", addCopyWithinPipelineTests);
5299 
5300 	return group.release();
5301 }
5302 
5303 }	// RayTracing
5304 
5305 }	// vkt
5306