• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *	  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Acceleration Structures tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingAccelerationStructuresTests.hpp"
25 
26 #include "vkDefs.hpp"
27 #include "deClock.h"
28 #include "deRandom.h"
29 
30 #include "vktTestCase.hpp"
31 #include "vktTestGroupUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkImageWithMemory.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkImageUtil.hpp"
40 #include "vkRayTracingUtil.hpp"
41 #include "tcuVectorUtil.hpp"
42 #include "tcuTexture.hpp"
43 #include "tcuTestLog.hpp"
44 #include "tcuImageCompare.hpp"
45 #include "tcuFloat.hpp"
46 #include "deModularCounter.hpp"
47 
48 #include <cmath>
49 #include <cstddef>
50 #include <set>
51 #include <limits>
52 #include <iostream>
53 
54 namespace vkt
55 {
56 namespace RayTracing
57 {
58 namespace
59 {
60 using namespace vk;
61 using namespace vkt;
62 using namespace tcu;
63 
64 static const VkFlags	ALL_RAY_TRACING_STAGES	= VK_SHADER_STAGE_RAYGEN_BIT_KHR
65 												| VK_SHADER_STAGE_ANY_HIT_BIT_KHR
66 												| VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR
67 												| VK_SHADER_STAGE_MISS_BIT_KHR
68 												| VK_SHADER_STAGE_INTERSECTION_BIT_KHR
69 												| VK_SHADER_STAGE_CALLABLE_BIT_KHR;
70 
71 
72 enum class BottomTestType
73 {
74 	TRIANGLES = 0,
75 	AABBS = 1,
76 };
77 
78 enum class TopTestType
79 {
80 	IDENTICAL_INSTANCES,
81 	DIFFERENT_INSTANCES,
82 	UPDATED_INSTANCES,
83 	MIX_INSTANCES,
84 };
85 
86 enum OperationTarget
87 {
88 	OT_NONE,
89 	OT_TOP_ACCELERATION,
90 	OT_BOTTOM_ACCELERATION
91 };
92 
93 enum OperationType
94 {
95 	OP_NONE,
96 	OP_COPY,
97 	OP_COMPACT,
98 	OP_SERIALIZE,
99 	OP_UPDATE,
100 	OP_UPDATE_IN_PLACE
101 };
102 
103 enum class InstanceCullFlags
104 {
105 	NONE,
106 	CULL_DISABLE,
107 	COUNTERCLOCKWISE,
108 	ALL,
109 };
110 
111 enum class EmptyAccelerationStructureCase
112 {
113 	NOT_EMPTY				= 0,
114 	INACTIVE_TRIANGLES		= 1,
115 	INACTIVE_INSTANCES		= 2,
116 	NO_GEOMETRIES_BOTTOM	= 3,	// geometryCount zero when building.
117 	NO_PRIMITIVES_BOTTOM	= 4,	// primitiveCount zero when building.
118 	NO_PRIMITIVES_TOP		= 5,	// primitiveCount zero when building.
119 };
120 
121 enum class InstanceCustomIndexCase
122 {
123 	NONE			= 0,
124 	CLOSEST_HIT		= 1,
125 	ANY_HIT			= 2,
126 	INTERSECTION	= 3,
127 };
128 
129 enum class UpdateCase
130 {
131 	NONE,
132 	VERTICES,
133 	INDICES,
134 	TRANSFORM
135 };
136 
137 static const deUint32 RTAS_DEFAULT_SIZE = 8u;
138 
139 // Chosen to have the most significant bit set to 1 when represented using 24 bits.
140 // This will make sure the instance custom index will not be sign-extended by mistake.
141 constexpr deUint32 INSTANCE_CUSTOM_INDEX_BASE = 0x807f00u;
142 
143 struct TestParams;
144 
145 class TestConfiguration
146 {
147 public:
148 	virtual std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
149 																												 TestParams&						testParams) = 0;
150 	virtual de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
151 																												 TestParams&						testParams,
152 																												 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) = 0;
153 	virtual void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
154 																												 Context&							context,
155 																												TestParams&							testParams) = 0;
156 	virtual void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
157 																												 Context&							context,
158 																												 TestParams&						testParams,
159 																												 VkPipeline							pipeline,
160 																												 deUint32							shaderGroupHandleSize,
161 																												 deUint32							shaderGroupBaseAlignment,
162 																												 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
163 																												 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
164 																												 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) = 0;
165 	virtual bool															verifyImage							(BufferWithMemory*					resultBuffer,
166 																												 Context&							context,
167 																												 TestParams&						testParams) = 0;
168 	virtual VkFormat														getResultImageFormat				() = 0;
169 	virtual size_t															getResultImageFormatSize			() = 0;
170 	virtual VkClearValue													getClearValue						() = 0;
171 };
172 
173 struct TestParams
174 {
175 	vk::VkAccelerationStructureBuildTypeKHR	buildType;		// are we making AS on CPU or GPU
176 	VkFormat								vertexFormat;
177 	bool									padVertices;
178 	VkIndexType								indexType;
179 	BottomTestType							bottomTestType; // what kind of geometry is stored in bottom AS
180 	InstanceCullFlags						cullFlags;		// Flags for instances, if needed.
181 	bool									bottomUsesAOP;	// does bottom AS use arrays, or arrays of pointers
182 	bool									bottomGeneric;	// Bottom created as generic AS type.
183 	bool									bottomUnboundedCreation; // Bottom created with unbounded buffer memory.
184 	TopTestType								topTestType;	// If instances are identical then bottom geometries must have different vertices/aabbs
185 	bool									topUsesAOP;		// does top AS use arrays, or arrays of pointers
186 	bool									topGeneric;		// Top created as generic AS type.
187 	bool									topUnboundedCreation; // Top created with unbounded buffer memory.
188 	VkBuildAccelerationStructureFlagsKHR	buildFlags;
189 	OperationTarget							operationTarget;
190 	OperationType							operationType;
191 	deUint32								width;
192 	deUint32								height;
193 	de::SharedPtr<TestConfiguration>		testConfiguration;
194 	deUint32								workerThreadsCount;
195 	EmptyAccelerationStructureCase			emptyASCase;
196 	InstanceCustomIndexCase					instanceCustomIndexCase;
197 	bool									useCullMask;
198 	uint32_t								cullMask;
199 	UpdateCase								updateCase;
200 };
201 
getShaderGroupSize(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)202 deUint32 getShaderGroupSize (const InstanceInterface&	vki,
203 							 const VkPhysicalDevice		physicalDevice)
204 {
205 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
206 
207 	rayTracingPropertiesKHR	= makeRayTracingProperties(vki, physicalDevice);
208 	return rayTracingPropertiesKHR->getShaderGroupHandleSize();
209 }
210 
getShaderGroupBaseAlignment(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)211 deUint32 getShaderGroupBaseAlignment (const InstanceInterface&	vki,
212 									  const VkPhysicalDevice	physicalDevice)
213 {
214 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
215 
216 	rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
217 	return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
218 }
219 
makeImageCreateInfo(deUint32 width,deUint32 height,VkFormat format)220 VkImageCreateInfo makeImageCreateInfo (deUint32 width, deUint32 height, VkFormat format)
221 {
222 	const VkImageCreateInfo			imageCreateInfo			=
223 	{
224 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,																// VkStructureType			sType;
225 		DE_NULL,																							// const void*				pNext;
226 		(VkImageCreateFlags)0u,																				// VkImageCreateFlags		flags;
227 		VK_IMAGE_TYPE_2D,																					// VkImageType				imageType;
228 		format,																								// VkFormat					format;
229 		makeExtent3D(width, height, 1u),																	// VkExtent3D				extent;
230 		1u,																									// deUint32					mipLevels;
231 		1u,																									// deUint32					arrayLayers;
232 		VK_SAMPLE_COUNT_1_BIT,																				// VkSampleCountFlagBits	samples;
233 		VK_IMAGE_TILING_OPTIMAL,																			// VkImageTiling			tiling;
234 		VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
235 		VK_SHARING_MODE_EXCLUSIVE,																			// VkSharingMode			sharingMode;
236 		0u,																									// deUint32					queueFamilyIndexCount;
237 		DE_NULL,																							// const deUint32*			pQueueFamilyIndices;
238 		VK_IMAGE_LAYOUT_UNDEFINED																			// VkImageLayout			initialLayout;
239 	};
240 
241 	return imageCreateInfo;
242 }
243 
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,deUint32 queryCount)244 Move<VkQueryPool> makeQueryPool(const DeviceInterface&		vk,
245 								const VkDevice				device,
246 								const VkQueryType			queryType,
247 								deUint32					queryCount)
248 {
249 	const VkQueryPoolCreateInfo				queryPoolCreateInfo =
250 	{
251 		VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,		// sType
252 		DE_NULL,										// pNext
253 		(VkQueryPoolCreateFlags)0,						// flags
254 		queryType,										// queryType
255 		queryCount,										// queryCount
256 		0u,												// pipelineStatistics
257 	};
258 	return createQueryPool(vk, device, &queryPoolCreateInfo);
259 }
260 
getCullFlags(InstanceCullFlags flags)261 VkGeometryInstanceFlagsKHR getCullFlags (InstanceCullFlags flags)
262 {
263 	VkGeometryInstanceFlagsKHR cullFlags = 0u;
264 
265 	if (flags == InstanceCullFlags::CULL_DISABLE || flags == InstanceCullFlags::ALL)
266 		cullFlags |= VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
267 
268 	if (flags == InstanceCullFlags::COUNTERCLOCKWISE || flags == InstanceCullFlags::ALL)
269 		cullFlags |= VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR;
270 
271 	return cullFlags;
272 }
273 
274 class CheckerboardConfiguration : public TestConfiguration
275 {
276 public:
277 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
278 																										 TestParams&						testParams) override;
279 	de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
280 																										 TestParams&						testParams,
281 																										 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) override;
282 	void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
283 																										 Context&							context,
284 																										 TestParams&						testParams) override;
285 	void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
286 																										 Context&							context,
287 																										 TestParams&						testParams,
288 																										 VkPipeline							pipeline,
289 																										 deUint32							shaderGroupHandleSize,
290 																										 deUint32							shaderGroupBaseAlignment,
291 																										 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
292 																										 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
293 																										 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) override;
294 	bool															verifyImage							(BufferWithMemory*					resultBuffer,
295 																										 Context&							context,
296 																										 TestParams&						testParams) override;
297 	VkFormat														getResultImageFormat				() override;
298 	size_t															getResultImageFormatSize			() override;
299 	VkClearValue													getClearValue						() override;
300 };
301 
initBottomAccelerationStructures(Context & context,TestParams & testParams)302 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > CheckerboardConfiguration::initBottomAccelerationStructures (Context&			context,
303 																														   TestParams&		testParams)
304 {
305 	DE_UNREF(context);
306 
307 	// Cull flags can only be used with triangles.
308 	DE_ASSERT(testParams.cullFlags == InstanceCullFlags::NONE || testParams.bottomTestType == BottomTestType::TRIANGLES);
309 
310 	// Checkerboard configuration does not support empty geometry tests.
311 	DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
312 
313 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
314 
315 	const auto instanceFlags = getCullFlags(testParams.cullFlags);
316 
317 	tcu::Vec3 v0(0.0, 1.0, 0.0);
318 	tcu::Vec3 v1(0.0, 0.0, 0.0);
319 	tcu::Vec3 v2(1.0, 1.0, 0.0);
320 	tcu::Vec3 v3(1.0, 0.0, 0.0);
321 
322 	if (testParams.topTestType == TopTestType::DIFFERENT_INSTANCES)
323 	{
324 		de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
325 		bottomLevelAccelerationStructure->setGeometryCount(1u);
326 		de::SharedPtr<RaytracedGeometryBase> geometry;
327 		if (testParams.bottomTestType == BottomTestType::TRIANGLES)
328 		{
329 			geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
330 			if (testParams.indexType == VK_INDEX_TYPE_NONE_KHR)
331 			{
332 				if (instanceFlags == 0u)
333 				{
334 					geometry->addVertex(v0);
335 					geometry->addVertex(v1);
336 					geometry->addVertex(v2);
337 					geometry->addVertex(v2);
338 					geometry->addVertex(v1);
339 					geometry->addVertex(v3);
340 				}
341 				else // Counterclockwise so the flags will be needed for the geometry to be visible.
342 				{
343 					geometry->addVertex(v2);
344 					geometry->addVertex(v1);
345 					geometry->addVertex(v0);
346 					geometry->addVertex(v3);
347 					geometry->addVertex(v1);
348 					geometry->addVertex(v2);
349 				}
350 			}
351 			else // m_data.indexType != VK_INDEX_TYPE_NONE_KHR
352 			{
353 				geometry->addVertex(v0);
354 				geometry->addVertex(v1);
355 				geometry->addVertex(v2);
356 				geometry->addVertex(v3);
357 
358 				if (instanceFlags == 0u)
359 				{
360 					geometry->addIndex(0);
361 					geometry->addIndex(1);
362 					geometry->addIndex(2);
363 					geometry->addIndex(2);
364 					geometry->addIndex(1);
365 					geometry->addIndex(3);
366 				}
367 				else // Counterclockwise so the flags will be needed for the geometry to be visible.
368 				{
369 					geometry->addIndex(2);
370 					geometry->addIndex(1);
371 					geometry->addIndex(0);
372 					geometry->addIndex(3);
373 					geometry->addIndex(1);
374 					geometry->addIndex(2);
375 				}
376 			}
377 		}
378 		else // m_data.bottomTestType == BTT_AABBS
379 		{
380 			geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_AABBS_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
381 
382 			if (!testParams.padVertices)
383 			{
384 				// Single AABB.
385 				geometry->addVertex(tcu::Vec3(0.0f, 0.0f, -0.1f));
386 				geometry->addVertex(tcu::Vec3(1.0f, 1.0f,  0.1f));
387 			}
388 			else
389 			{
390 				// Multiple AABBs covering the same space.
391 				geometry->addVertex(tcu::Vec3(0.0f, 0.0f, -0.1f));
392 				geometry->addVertex(tcu::Vec3(0.5f, 0.5f,  0.1f));
393 
394 				geometry->addVertex(tcu::Vec3(0.5f, 0.5f, -0.1f));
395 				geometry->addVertex(tcu::Vec3(1.0f, 1.0f,  0.1f));
396 
397 				geometry->addVertex(tcu::Vec3(0.0f, 0.5f, -0.1f));
398 				geometry->addVertex(tcu::Vec3(0.5f, 1.0f,  0.1f));
399 
400 				geometry->addVertex(tcu::Vec3(0.5f, 0.0f, -0.1f));
401 				geometry->addVertex(tcu::Vec3(1.0f, 0.5f,  0.1f));
402 			}
403 		}
404 
405 		bottomLevelAccelerationStructure->addGeometry(geometry);
406 
407 		if (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT)
408 			geometry->setGeometryFlags(VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR);
409 
410 		result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
411 	}
412 	else // m_data.topTestType == TTT_IDENTICAL_INSTANCES
413 	{
414 		// triangle and aabb tests use geometries/aabbs with different vertex positions and the same identity matrix in each instance data
415 		for (deUint32 y = 0; y < testParams.height; ++y)
416 		for (deUint32 x = 0; x < testParams.width; ++x)
417 		{
418 			// let's build a chessboard of geometries
419 			if (((x + y) % 2) == 0)
420 				continue;
421 			tcu::Vec3 xyz((float)x, (float)y, 0.0f);
422 
423 			de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
424 			bottomLevelAccelerationStructure->setGeometryCount(1u);
425 
426 			de::SharedPtr<RaytracedGeometryBase> geometry;
427 			if (testParams.bottomTestType == BottomTestType::TRIANGLES)
428 			{
429 				geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
430 				if (testParams.indexType == VK_INDEX_TYPE_NONE_KHR)
431 				{
432 					if (instanceFlags == 0u)
433 					{
434 						geometry->addVertex(xyz + v0);
435 						geometry->addVertex(xyz + v1);
436 						geometry->addVertex(xyz + v2);
437 						geometry->addVertex(xyz + v2);
438 						geometry->addVertex(xyz + v1);
439 						geometry->addVertex(xyz + v3);
440 					}
441 					else // Counterclockwise so the flags will be needed for the geometry to be visible.
442 					{
443 						geometry->addVertex(xyz + v2);
444 						geometry->addVertex(xyz + v1);
445 						geometry->addVertex(xyz + v0);
446 						geometry->addVertex(xyz + v3);
447 						geometry->addVertex(xyz + v1);
448 						geometry->addVertex(xyz + v2);
449 					}
450 				}
451 				else
452 				{
453 					geometry->addVertex(xyz + v0);
454 					geometry->addVertex(xyz + v1);
455 					geometry->addVertex(xyz + v2);
456 					geometry->addVertex(xyz + v3);
457 
458 					if (instanceFlags == 0u)
459 					{
460 						geometry->addIndex(0);
461 						geometry->addIndex(1);
462 						geometry->addIndex(2);
463 						geometry->addIndex(2);
464 						geometry->addIndex(1);
465 						geometry->addIndex(3);
466 					}
467 					else // Counterclockwise so the flags will be needed for the geometry to be visible.
468 					{
469 						geometry->addIndex(2);
470 						geometry->addIndex(1);
471 						geometry->addIndex(0);
472 						geometry->addIndex(3);
473 						geometry->addIndex(1);
474 						geometry->addIndex(2);
475 					}
476 				}
477 			}
478 			else // testParams.bottomTestType == BTT_AABBS
479 			{
480 				geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_AABBS_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
481 
482 				if (!testParams.padVertices)
483 				{
484 					// Single AABB.
485 					geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.0f, -0.1f));
486 					geometry->addVertex(xyz + tcu::Vec3(1.0f, 1.0f,  0.1f));
487 				}
488 				else
489 				{
490 					// Multiple AABBs covering the same space.
491 					geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.0f, -0.1f));
492 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.5f,  0.1f));
493 
494 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.5f, -0.1f));
495 					geometry->addVertex(xyz + tcu::Vec3(1.0f, 1.0f,  0.1f));
496 
497 					geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.5f, -0.1f));
498 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 1.0f,  0.1f));
499 
500 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.0f, -0.1f));
501 					geometry->addVertex(xyz + tcu::Vec3(1.0f, 0.5f,  0.1f));
502 				}
503 			}
504 
505 			bottomLevelAccelerationStructure->addGeometry(geometry);
506 
507 			if (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT)
508 				geometry->setGeometryFlags(VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR);
509 
510 			result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
511 		}
512 	}
513 
514 	return result;
515 }
516 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)517 de::MovePtr<TopLevelAccelerationStructure> CheckerboardConfiguration::initTopAccelerationStructure (Context&		context,
518 																									TestParams&		testParams,
519 																									std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >& bottomLevelAccelerationStructures)
520 {
521 	// Checkerboard configuration does not support empty geometry tests.
522 	DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
523 
524 	DE_UNREF(context);
525 
526 	const auto instanceCount = testParams.width * testParams.height / 2u;
527 	const auto instanceFlags = getCullFlags(testParams.cullFlags);
528 
529 	de::MovePtr<TopLevelAccelerationStructure>	result = makeTopLevelAccelerationStructure();
530 	result->setInstanceCount(instanceCount);
531 
532 	if (testParams.topTestType == TopTestType::DIFFERENT_INSTANCES)
533 	{
534 
535 		for (deUint32 y = 0; y < testParams.height; ++y)
536 		for (deUint32 x = 0; x < testParams.width; ++x)
537 		{
538 			if (((x + y) % 2) == 0)
539 				continue;
540 			const VkTransformMatrixKHR			transformMatrixKHR =
541 			{
542 				{								//  float	matrix[3][4];
543 					{ 1.0f, 0.0f, 0.0f, (float)x },
544 					{ 0.0f, 1.0f, 0.0f, (float)y },
545 					{ 0.0f, 0.0f, 1.0f, 0.0f },
546 				}
547 			};
548 			const deUint32 instanceCustomIndex = ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ? (INSTANCE_CUSTOM_INDEX_BASE + x + y) : 0u);
549 			result->addInstance(bottomLevelAccelerationStructures[0], transformMatrixKHR, instanceCustomIndex, 0xFFu, 0u, instanceFlags);
550 		}
551 	}
552 	else // testParams.topTestType == TTT_IDENTICAL_INSTANCES
553 	{
554 		deUint32 currentInstanceIndex = 0;
555 
556 		for (deUint32 y = 0; y < testParams.height; ++y)
557 		for (deUint32 x = 0; x < testParams.width; ++x)
558 		{
559 			if (((x + y) % 2) == 0)
560 				continue;
561 			const deUint32 instanceCustomIndex = ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ? (INSTANCE_CUSTOM_INDEX_BASE + x + y) : 0u);
562 
563 			if (testParams.useCullMask)
564 			{
565 				result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex++], identityMatrix3x4, instanceCustomIndex, testParams.cullMask, 0u, instanceFlags);
566 			}
567 			else
568 			{
569 				result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex++], identityMatrix3x4, instanceCustomIndex, 0xFFu, 0u, instanceFlags);
570 			}
571 		}
572 	}
573 
574 	return result;
575 }
576 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)577 void CheckerboardConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline>&		rayTracingPipeline,
578 													  Context&								context,
579 													  TestParams&							testParams)
580 {
581 	DE_UNREF(testParams);
582 	const DeviceInterface&						vkd						= context.getDeviceInterface();
583 	const VkDevice								device					= context.getDevice();
584 
585 	const bool useAnyHit		= (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT);
586 	const auto hitShaderStage	= (useAnyHit ? VK_SHADER_STAGE_ANY_HIT_BIT_KHR : VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
587 	const auto hitShaderName	= (useAnyHit ? "ahit" : "chit");
588 
589 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		createShaderModule(vkd, device, context.getBinaryCollection().get("rgen"),  0), 0);
590 	rayTracingPipeline->addShader(hitShaderStage,						createShaderModule(vkd, device, context.getBinaryCollection().get(hitShaderName),  0), 1);
591 	rayTracingPipeline->addShader(hitShaderStage,						createShaderModule(vkd, device, context.getBinaryCollection().get(hitShaderName),  0), 2);
592 	if (testParams.bottomTestType == BottomTestType::AABBS)
593 		rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR,	createShaderModule(vkd, device, context.getBinaryCollection().get("isect"), 0), 2);
594 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			createShaderModule(vkd, device, context.getBinaryCollection().get("miss"),  0), 3);
595 }
596 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,deUint32 shaderGroupHandleSize,deUint32 shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)597 void CheckerboardConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
598 														Context&							context,
599 														TestParams&							testParams,
600 														VkPipeline							pipeline,
601 														deUint32							shaderGroupHandleSize,
602 														deUint32							shaderGroupBaseAlignment,
603 														de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
604 														de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
605 														de::MovePtr<BufferWithMemory>&		missShaderBindingTable)
606 {
607 	const DeviceInterface&						vkd						= context.getDeviceInterface();
608 	const VkDevice								device					= context.getDevice();
609 	Allocator&									allocator				= context.getDefaultAllocator();
610 
611 	raygenShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
612 	if(testParams.bottomTestType == BottomTestType::AABBS)
613 		hitShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1 );
614 	else // testParams.bottomTestType == BTT_TRIANGLES
615 		hitShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
616 	missShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 3, 1 );
617 }
618 
bitfieldReverse(deUint32 num)619 deUint32 bitfieldReverse(deUint32 num)
620 {
621 	deUint32 reverse_num = 0;
622 	deUint32 i;
623 	for (i = 0; i < 32; i++)
624 	{
625 		if((num & (1 << i)))
626 	reverse_num |= 1 << ((32 - 1) - i);
627 	}
628 	return reverse_num;
629 }
630 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)631 bool CheckerboardConfiguration::verifyImage(BufferWithMemory* resultBuffer, Context& context, TestParams& testParams)
632 {
633 	// Checkerboard configuration does not support empty geometry tests.
634 	DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
635 
636 	DE_UNREF(context);
637 	const auto*						bufferPtr		= (deInt32*)resultBuffer->getAllocation().getHostPtr();
638 	deUint32						pos				= 0;
639 	deUint32						failures		= 0;
640 
641 	// verify results - each test case should generate checkerboard pattern
642 	for (deUint32 y = 0; y < testParams.height; ++y)
643 	for (deUint32 x = 0; x < testParams.width; ++x)
644 	{
645 		// The hit value should match the shader code.
646 		if (testParams.useCullMask)
647 		{
648 			const deInt32 hitValue			= testParams.cullMask & 0x000000FFu; // only 8 last bits are used by the cullMask
649 			const deInt32 expectedResult	= ((x + y) % 2) ? hitValue : bitfieldReverse(testParams.cullMask &  0x000000FFu);
650 
651 			if (bufferPtr[pos] != expectedResult)
652 				failures++;
653 		}
654 		else
655 		{
656 			const deInt32 hitValue			= ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ? static_cast<deInt32>(INSTANCE_CUSTOM_INDEX_BASE + x + y) : 2);
657 			const deInt32 expectedResult	= ((x + y) % 2) ? hitValue : 1;
658 
659 			if (bufferPtr[pos] != expectedResult)
660 				failures++;
661 		}
662 
663 		++pos;
664 	}
665 	return failures == 0;
666 }
667 
getResultImageFormat()668 VkFormat CheckerboardConfiguration::getResultImageFormat()
669 {
670 	return VK_FORMAT_R32_SINT;
671 }
672 
getResultImageFormatSize()673 size_t CheckerboardConfiguration::getResultImageFormatSize()
674 {
675 	return sizeof(deUint32);
676 }
677 
getClearValue()678 VkClearValue CheckerboardConfiguration::getClearValue()
679 {
680 	return makeClearValueColorU32(0xFF, 0u, 0u, 0u);
681 }
682 
683 class SingleTriangleConfiguration : public TestConfiguration
684 {
685 public:
686 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
687 																										 TestParams&						testParams) override;
688 	de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
689 																										 TestParams&						testParams,
690 																										 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) override;
691 	void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
692 																										 Context&							context,
693 																										 TestParams&						testParams) override;
694 	void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
695 																										 Context&							context,
696 																										 TestParams&						testParams,
697 																										 VkPipeline							pipeline,
698 																										 deUint32							shaderGroupHandleSize,
699 																										 deUint32							shaderGroupBaseAlignment,
700 																										 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
701 																										 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
702 																										 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) override;
703 	bool															verifyImage							(BufferWithMemory*					resultBuffer,
704 																										 Context&							context,
705 																										 TestParams&						testParams) override;
706 	VkFormat														getResultImageFormat				() override;
707 	size_t															getResultImageFormatSize			() override;
708 	VkClearValue													getClearValue						() override;
709 
710 	// well, actually we have 2 triangles, but we ignore the first one ( see raygen shader for this configuration )
711 	const std::vector<tcu::Vec3> vertices =
712 	{
713 		tcu::Vec3(0.0f, 0.0f, -0.1f),
714 		tcu::Vec3(-0.1f, 0.0f, 0.0f),
715 		tcu::Vec3(0.0f, -0.1f, 0.0f),
716 		tcu::Vec3(0.0f, 0.0f, 0.0f),
717 		tcu::Vec3(0.5f, 0.0f, -0.5f),
718 		tcu::Vec3(0.0f, 0.5f, -0.5f),
719 	};
720 
721 	const std::vector<deUint32> indices =
722 	{
723 		3,
724 		4,
725 		5
726 	};
727 	// Different vertex configurations of a triangle whose parameter x is set to NaN during inactive_triangles tests
728 	const bool nanConfig[7][3] =
729 	{
730 		{ true,		true,		true	},
731 		{ true,		false,		false	},
732 		{ false,	true,		false	},
733 		{ false,	false,		true	},
734 		{ true,		true,		false	},
735 		{ false,	true,		true	},
736 		{ true,		false,		true	},
737 	};
738 };
739 
initBottomAccelerationStructures(Context & context,TestParams & testParams)740 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > SingleTriangleConfiguration::initBottomAccelerationStructures (Context&			context,
741 																															 TestParams&		testParams)
742 {
743 	DE_UNREF(context);
744 
745 	// No other cases supported for the single triangle configuration.
746 	DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
747 
748 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
749 
750 	de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
751 
752 	unsigned int geometryCount = testParams.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_TRIANGLES ? 4U : 1U;
753 
754 	if (testParams.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_TRIANGLES)
755 	{
756 		bottomLevelAccelerationStructure->setGeometryCount(geometryCount);
757 
758 		de::SharedPtr<RaytracedGeometryBase> geometry;
759 		geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType);
760 
761 		for (unsigned int i = 0; i < geometryCount; i++)
762 		{
763 			auto customVertices(vertices);
764 
765 			const auto nanValue = tcu::Float32::nan().asFloat();
766 
767 			if (nanConfig[i][0])
768 				customVertices[3].x() = nanValue;
769 			if (nanConfig[i][1])
770 				customVertices[4].x() = nanValue;
771 			if (nanConfig[i][2])
772 				customVertices[5].x() = nanValue;
773 
774 			for (auto it = begin(customVertices), eit = end(customVertices); it != eit; ++it)
775 				geometry->addVertex(*it);
776 
777 			if (testParams.indexType != VK_INDEX_TYPE_NONE_KHR)
778 			{
779 				for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
780 					geometry->addIndex(*it);
781 			}
782 			bottomLevelAccelerationStructure->addGeometry(geometry);
783 		}
784 	}
785 	else
786 	{
787 		bottomLevelAccelerationStructure->setGeometryCount(geometryCount);
788 
789 		de::SharedPtr<RaytracedGeometryBase> geometry;
790 		geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType);
791 
792 		for (auto it = begin(vertices), eit = end(vertices); it != eit; ++it)
793 			geometry->addVertex(*it);
794 
795 		if (testParams.indexType != VK_INDEX_TYPE_NONE_KHR)
796 		{
797 			for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
798 				geometry->addIndex(*it);
799 		}
800 		bottomLevelAccelerationStructure->addGeometry(geometry);
801 	}
802 
803 	result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
804 
805 	return result;
806 }
807 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)808 de::MovePtr<TopLevelAccelerationStructure> SingleTriangleConfiguration::initTopAccelerationStructure (Context&			context,
809 																									  TestParams&		testParams,
810 																									  std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >& bottomLevelAccelerationStructures)
811 {
812 	DE_UNREF(context);
813 	DE_UNREF(testParams);
814 
815 	// Unsupported in this configuration.
816 	DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
817 
818 	de::MovePtr<TopLevelAccelerationStructure>	result = makeTopLevelAccelerationStructure();
819 	result->setInstanceCount(1u);
820 
821 	result->addInstance(bottomLevelAccelerationStructures[0]);
822 
823 	return result;
824 }
825 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)826 void SingleTriangleConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline>&		rayTracingPipeline,
827 														Context&								context,
828 														TestParams&								testParams)
829 {
830 	DE_UNREF(testParams);
831 	const DeviceInterface&						vkd						= context.getDeviceInterface();
832 	const VkDevice								device					= context.getDevice();
833 
834 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		createShaderModule(vkd, device, context.getBinaryCollection().get("rgen_depth"),  0), 0);
835 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	createShaderModule(vkd, device, context.getBinaryCollection().get("chit_depth"),  0), 1);
836 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			createShaderModule(vkd, device, context.getBinaryCollection().get("miss_depth"),  0), 2);
837 }
838 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,deUint32 shaderGroupHandleSize,deUint32 shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)839 void SingleTriangleConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
840 														  Context&							context,
841 														  TestParams&						testParams,
842 														  VkPipeline						pipeline,
843 														  deUint32							shaderGroupHandleSize,
844 														  deUint32							shaderGroupBaseAlignment,
845 														  de::MovePtr<BufferWithMemory>&	raygenShaderBindingTable,
846 														  de::MovePtr<BufferWithMemory>&	hitShaderBindingTable,
847 														  de::MovePtr<BufferWithMemory>&	missShaderBindingTable)
848 {
849 	DE_UNREF(testParams);
850 	const DeviceInterface&						vkd						= context.getDeviceInterface();
851 	const VkDevice								device					= context.getDevice();
852 	Allocator&									allocator				= context.getDefaultAllocator();
853 
854 	raygenShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
855 	hitShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
856 	missShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1 );
857 }
858 
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)859 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
860 {
861 	float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
862 	float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
863 
864 	if ((s < 0) != (t < 0))
865 		return false;
866 
867 	float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
868 
869 	return a < 0 ?
870 		(s <= 0 && s + t >= a) :
871 		(s >= 0 && s + t <= a);
872 }
873 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)874 bool SingleTriangleConfiguration::verifyImage(BufferWithMemory* resultBuffer, Context& context, TestParams& testParams)
875 {
876 	tcu::TextureFormat			imageFormat		= vk::mapVkFormat(getResultImageFormat());
877 	tcu::TextureFormat			vertexFormat	= vk::mapVkFormat(testParams.vertexFormat);
878 	tcu::ConstPixelBufferAccess	resultAccess	(imageFormat, testParams.width, testParams.height, 1, resultBuffer->getAllocation().getHostPtr());
879 
880 	std::vector<float>			reference		(testParams.width * testParams.height);
881 	tcu::PixelBufferAccess		referenceAccess	(imageFormat, testParams.width, testParams.height, 1, reference.data());
882 
883 	// verify results
884 	tcu::Vec3					v0				= vertices[3];
885 	tcu::Vec3					v1				= vertices[4];
886 	tcu::Vec3					v2				= vertices[5];
887 	const int					numChannels		= tcu::getNumUsedChannels(vertexFormat.order);
888 	if (numChannels < 3)
889 	{
890 		v0.z() = 0.0f;
891 		v1.z() = 0.0f;
892 		v2.z() = 0.0f;
893 	}
894 	tcu::Vec3					abc				= tcu::cross((v2 - v0), (v1 - v0));
895 
896 	for (deUint32 j = 0; j < testParams.height; ++j)
897 	{
898 		float y = 0.1f + 0.2f * float(j) / float(testParams.height - 1);
899 		for (deUint32 i = 0; i < testParams.width; ++i)
900 		{
901 			float	x			= 0.1f + 0.2f * float(i) / float(testParams.width - 1);
902 			float	z			= (abc.x()*x + abc.y()*y) / abc.z();
903 			bool	inTriangle	= pointInTriangle2D(tcu::Vec3(x, y, z), v0, v1, v2);
904 			float	refValue	= ((inTriangle && testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY) ? 1.0f+z : 0.0f);
905 			referenceAccess.setPixel(tcu::Vec4(refValue, 0.0f, 0.0f, 1.0f), i, j);
906 		}
907 	}
908 	return tcu::floatThresholdCompare(context.getTestContext().getLog(), "Result comparison", "", referenceAccess, resultAccess, tcu::Vec4(0.01f), tcu::COMPARE_LOG_EVERYTHING);
909 }
910 
getResultImageFormat()911 VkFormat SingleTriangleConfiguration::getResultImageFormat()
912 {
913 	return VK_FORMAT_R32_SFLOAT;
914 }
915 
getResultImageFormatSize()916 size_t SingleTriangleConfiguration::getResultImageFormatSize()
917 {
918 	return sizeof(float);
919 }
920 
getClearValue()921 VkClearValue SingleTriangleConfiguration::getClearValue()
922 {
923 	return makeClearValueColorF32(32.0f, 0.0f, 0.0f, 0.0f);
924 }
925 
926 class UpdateableASConfiguration : public TestConfiguration
927 {
928 public:
929 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
930 																										 TestParams&						testParams) override;
931 	de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
932 																										 TestParams&						testParams,
933 																										 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) override;
934 	void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
935 																										 Context&							context,
936 																										 TestParams&						testParams) override;
937 	void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
938 																										 Context&							context,
939 																										 TestParams&						testParams,
940 																										 VkPipeline							pipeline,
941 																										 deUint32							shaderGroupHandleSize,
942 																										 deUint32							shaderGroupBaseAlignment,
943 																										 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
944 																										 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
945 																										 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) override;
946 	bool															verifyImage							(BufferWithMemory*					resultBuffer,
947 																										 Context&							context,
948 																										 TestParams&						testParams) override;
949 	VkFormat														getResultImageFormat				() override;
950 	size_t															getResultImageFormatSize			() override;
951 	VkClearValue													getClearValue						() override;
952 
953 	// two triangles: one in the front we will replace with one in the back after updating
954 	// update vertex: build with vertices[0], update vertices with vertices[1]
955 	// update index: build with vertices[0], updade indices with indices[1]
956 	const std::vector<tcu::Vec3> vertices =
957 	{
958 		tcu::Vec3(0.0f, 0.0f, 0.0f),
959 		tcu::Vec3(0.5f, 0.0f, 0.0f),
960 		tcu::Vec3(0.0f, 0.5f, 0.0f),
961 		tcu::Vec3(0.0f, 0.0f, -0.5f),
962 		tcu::Vec3(0.5f, 0.0f, -0.5f),
963 		tcu::Vec3(0.0f, 0.5f, -0.5f),
964 	};
965 
966 	const std::vector<deUint32> indices =
967 	{
968 		0,
969 		1,
970 		2
971 	};
972 };
973 
initBottomAccelerationStructures(Context & context,TestParams & testParams)974 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > UpdateableASConfiguration::initBottomAccelerationStructures (Context&			context,
975 																														 TestParams&		testParams)
976 {
977 	DE_UNREF(context);
978 
979 	// No other cases supported for the single triangle configuration.
980 	DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
981 
982 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
983 
984 	{
985 		de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
986 
987 		unsigned int geometryCount = 1U;
988 
989 		bottomLevelAccelerationStructure->setGeometryCount(geometryCount);
990 
991 		de::SharedPtr<RaytracedGeometryBase> geometry;
992 		geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType);
993 
994 		for (auto it = begin(vertices), eit = end(vertices); it != eit; ++it)
995 			geometry->addVertex(*it);
996 
997 		if (testParams.indexType != VK_INDEX_TYPE_NONE_KHR)
998 		{
999 			for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
1000 				geometry->addIndex(*it);
1001 		}
1002 		bottomLevelAccelerationStructure->addGeometry(geometry);
1003 
1004 		result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
1005 	}
1006 	return result;
1007 }
1008 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)1009 de::MovePtr<TopLevelAccelerationStructure> UpdateableASConfiguration::initTopAccelerationStructure (Context&			context,
1010 																									  TestParams&		testParams,
1011 																									  std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >& bottomLevelAccelerationStructures)
1012 {
1013 	DE_UNREF(context);
1014 	DE_UNREF(testParams);
1015 
1016 	// Unsupported in this configuration.
1017 	DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
1018 
1019 	de::MovePtr<TopLevelAccelerationStructure>	result = makeTopLevelAccelerationStructure();
1020 	result->setInstanceCount(1u);
1021 
1022 	result->addInstance(bottomLevelAccelerationStructures[0]);
1023 
1024 	return result;
1025 }
1026 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)1027 void UpdateableASConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline>&		rayTracingPipeline,
1028 														Context&								context,
1029 														TestParams&								testParams)
1030 {
1031 	DE_UNREF(testParams);
1032 	const DeviceInterface&						vkd						= context.getDeviceInterface();
1033 	const VkDevice								device					= context.getDevice();
1034 
1035 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		createShaderModule(vkd, device, context.getBinaryCollection().get("rgen_depth"),  0), 0);
1036 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	createShaderModule(vkd, device, context.getBinaryCollection().get("chit_depth"),  0), 1);
1037 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			createShaderModule(vkd, device, context.getBinaryCollection().get("miss_depth"),  0), 2);
1038 }
1039 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,deUint32 shaderGroupHandleSize,deUint32 shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)1040 void UpdateableASConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
1041 														  Context&							context,
1042 														  TestParams&						testParams,
1043 														  VkPipeline						pipeline,
1044 														  deUint32							shaderGroupHandleSize,
1045 														  deUint32							shaderGroupBaseAlignment,
1046 														  de::MovePtr<BufferWithMemory>&	raygenShaderBindingTable,
1047 														  de::MovePtr<BufferWithMemory>&	hitShaderBindingTable,
1048 														  de::MovePtr<BufferWithMemory>&	missShaderBindingTable)
1049 {
1050 	DE_UNREF(testParams);
1051 	const DeviceInterface&						vkd						= context.getDeviceInterface();
1052 	const VkDevice								device					= context.getDevice();
1053 	Allocator&									allocator				= context.getDefaultAllocator();
1054 
1055 	raygenShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
1056 	hitShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
1057 	missShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1 );
1058 }
1059 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)1060 bool UpdateableASConfiguration::verifyImage(BufferWithMemory* resultBuffer, Context& context, TestParams& testParams)
1061 {
1062 	tcu::TextureFormat			imageFormat		= vk::mapVkFormat(getResultImageFormat());
1063 	tcu::TextureFormat			vertexFormat	= vk::mapVkFormat(testParams.vertexFormat);
1064 	tcu::ConstPixelBufferAccess	resultAccess	(imageFormat, testParams.width, testParams.height, 1, resultBuffer->getAllocation().getHostPtr());
1065 
1066 	std::vector<float>			reference		(testParams.width * testParams.height);
1067 	tcu::PixelBufferAccess		referenceAccess	(imageFormat, testParams.width, testParams.height, 1, reference.data());
1068 
1069 	// verify results
1070 	tcu::Vec3					v0				= vertices[3];
1071 	tcu::Vec3					v1				= vertices[4];
1072 	tcu::Vec3					v2				= vertices[5];
1073 	const int					numChannels		= tcu::getNumUsedChannels(vertexFormat.order);
1074 	if (numChannels < 3)
1075 	{
1076 		v0.z() = 0.0f;
1077 		v1.z() = 0.0f;
1078 		v2.z() = 0.0f;
1079 	}
1080 
1081 	for (deUint32 j = 0; j < testParams.height; ++j)
1082 	{
1083 		float y = 0.1f + 0.2f * float(j) / float(testParams.height - 1);
1084 		for (deUint32 i = 0; i < testParams.width; ++i)
1085 		{
1086 			float	x			= 0.1f + 0.2f * float(i) / float(testParams.width - 1);
1087 			float	z			= v0.z();
1088 			bool	inTriangle	= pointInTriangle2D(tcu::Vec3(x, y, z), v0, v1, v2);
1089 			float	refValue	= ((inTriangle && testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY) ? 1.0f-z : 0.0f);
1090 			referenceAccess.setPixel(tcu::Vec4(refValue, 0.0f, 0.0f, 1.0f), i, j);
1091 		}
1092 	}
1093 	return tcu::floatThresholdCompare(context.getTestContext().getLog(), "Result comparison", "", referenceAccess, resultAccess, tcu::Vec4(0.01f), tcu::COMPARE_LOG_EVERYTHING);
1094 }
1095 
getResultImageFormat()1096 VkFormat UpdateableASConfiguration::getResultImageFormat()
1097 {
1098 	return VK_FORMAT_R32_SFLOAT;
1099 }
1100 
getResultImageFormatSize()1101 size_t UpdateableASConfiguration::getResultImageFormatSize()
1102 {
1103 	return sizeof(float);
1104 }
1105 
getClearValue()1106 VkClearValue UpdateableASConfiguration::getClearValue()
1107 {
1108 	return makeClearValueColorF32(32.0f, 0.0f, 0.0f, 0.0f);
1109 }
1110 
commonASTestsCheckSupport(Context & context)1111 void commonASTestsCheckSupport(Context& context)
1112 {
1113 	context.requireInstanceFunctionality("VK_KHR_get_physical_device_properties2");
1114 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
1115 	context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
1116 
1117 	const VkPhysicalDeviceRayTracingPipelineFeaturesKHR&	rayTracingPipelineFeaturesKHR = context.getRayTracingPipelineFeatures();
1118 	if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == DE_FALSE)
1119 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
1120 
1121 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
1122 	if (accelerationStructureFeaturesKHR.accelerationStructure == DE_FALSE)
1123 		TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
1124 }
1125 
1126 class RayTracingASBasicTestCase : public TestCase
1127 {
1128 public:
1129 																	RayTracingASBasicTestCase			(tcu::TestContext& context, const char* name, const TestParams& data);
1130 																	~RayTracingASBasicTestCase			(void);
1131 
1132 	void															checkSupport						(Context& context) const override;
1133 	void															initPrograms						(SourceCollections& programCollection) const override;
1134 	TestInstance*													createInstance						(Context& context) const override;
1135 protected:
1136 	TestParams														m_data;
1137 };
1138 
1139 // Same as RayTracingASBasicTestCase but it will only initialize programs for SingleTriangleConfiguration and use hand-tuned SPIR-V
1140 // assembly.
1141 class RayTracingASFuncArgTestCase : public RayTracingASBasicTestCase
1142 {
1143 public:
1144 																	RayTracingASFuncArgTestCase			(tcu::TestContext& context, const char* name, const TestParams& data);
~RayTracingASFuncArgTestCase(void)1145 																	~RayTracingASFuncArgTestCase		(void) {}
1146 
1147 	void															initPrograms						(SourceCollections& programCollection) const override;
1148 };
1149 
1150 class RayTracingASBasicTestInstance : public TestInstance
1151 {
1152 public:
1153 																	RayTracingASBasicTestInstance		(Context& context, const TestParams& data);
1154 																	~RayTracingASBasicTestInstance		(void) = default;
1155 	tcu::TestStatus													iterate								(void) override;
1156 
1157 protected:
1158 	bool															iterateNoWorkers					(void);
1159 	bool															iterateWithWorkers					(void);
1160 	de::MovePtr<BufferWithMemory>									runTest								(const deUint32 workerThreadsCount);
1161 private:
1162 	TestParams														m_data;
1163 };
1164 
RayTracingASBasicTestCase(tcu::TestContext & context,const char * name,const TestParams & data)1165 RayTracingASBasicTestCase::RayTracingASBasicTestCase (tcu::TestContext& context, const char* name, const TestParams& data)
1166 	: vkt::TestCase	(context, name)
1167 	, m_data		(data)
1168 {
1169 }
1170 
~RayTracingASBasicTestCase(void)1171 RayTracingASBasicTestCase::~RayTracingASBasicTestCase	(void)
1172 {
1173 }
1174 
checkSupport(Context & context) const1175 void RayTracingASBasicTestCase::checkSupport(Context& context) const
1176 {
1177 	commonASTestsCheckSupport(context);
1178 
1179 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
1180 	if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
1181 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
1182 
1183 	if (m_data.useCullMask)
1184 		context.requireDeviceFunctionality("VK_KHR_ray_tracing_maintenance1");
1185 
1186 	// Check supported vertex format.
1187 	checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(), m_data.vertexFormat);
1188 }
1189 
initPrograms(SourceCollections & programCollection) const1190 void RayTracingASBasicTestCase::initPrograms (SourceCollections& programCollection) const
1191 {
1192 	bool storeInRGen = false;
1193 	bool storeInAHit = false;
1194 	bool storeInCHit = false;
1195 	bool storeInISec = false;
1196 
1197 	switch (m_data.instanceCustomIndexCase)
1198 	{
1199 	case InstanceCustomIndexCase::NONE:			storeInRGen = true;	break;
1200 	case InstanceCustomIndexCase::CLOSEST_HIT:	storeInCHit = true; break;
1201 	case InstanceCustomIndexCase::ANY_HIT:		storeInAHit = true;	break;
1202 	case InstanceCustomIndexCase::INTERSECTION:	storeInISec = true; break;
1203 	default: DE_ASSERT(false); break;
1204 	}
1205 
1206 
1207 	const std::string				imageDeclaration	= "layout(r32i, set = 0, binding = 0) uniform iimage2D result;\n";
1208 	const std::string				storeCustomIndex	= "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(gl_InstanceCustomIndexEXT, 0, 0, 1));\n";
1209 	const std::string				storeCullMask		= "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(gl_CullMaskEXT, 0, 0, 1));\n";
1210 	const vk::ShaderBuildOptions	buildOptions		(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
1211 
1212 	{
1213 		std::stringstream css;
1214 		css
1215 			<< "#version 460 core\n"
1216 			<< "#extension GL_EXT_ray_tracing : require\n"
1217 			<< "layout(location = 0) rayPayloadEXT ivec4 hitValue;\n";
1218 
1219 		if (storeInRGen)
1220 			css << imageDeclaration;
1221 
1222 		css
1223 			<< "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
1224 			<< "\n"
1225 			<< "void main()\n"
1226 			<< "{\n"
1227 			<< "  float tmin      = 0.0;\n"
1228 			<< "  float tmax      = 1.0;\n"
1229 			<< "  vec3  origin    = vec3(float(gl_LaunchIDEXT.x) + 0.5f, float(gl_LaunchIDEXT.y) + 0.5f, 0.5);\n"
1230 			<< "  vec3  direction = vec3(0.0,0.0,-1.0);\n"
1231 			<< "  hitValue        = ivec4(0,0,0,0);\n"
1232 			<< "  traceRayEXT(topLevelAS, " << ((m_data.cullFlags == InstanceCullFlags::NONE) ? "0, " : "gl_RayFlagsCullBackFacingTrianglesEXT, ") << m_data.cullMask << ", 0, 0, 0, origin, tmin, direction, tmax, 0);\n";
1233 
1234 		if (storeInRGen)
1235 			css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n";
1236 
1237 		css << "}\n";
1238 
1239 		programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
1240 	}
1241 
1242 	{
1243 		std::stringstream css;
1244 		css
1245 			<< "#version 460 core\n"
1246 			<< "#extension GL_EXT_ray_tracing : require\n"
1247 			<< ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1248 			<< "layout(location = 0) rayPayloadInEXT ivec4 hitValue;\n";
1249 
1250 		if (storeInCHit)
1251 			css << imageDeclaration;
1252 
1253 		css
1254 			<< "void main()\n"
1255 			<< "{\n"
1256 			<< "  hitValue = ivec4(2,0,0,1);\n";
1257 
1258 		if (storeInCHit)
1259 		{
1260 			if (m_data.useCullMask)
1261 			{
1262 				css << storeCullMask;
1263 			}
1264 			else
1265 			{
1266 				css << storeCustomIndex;
1267 			}
1268 		}
1269 
1270 		css << "}\n";
1271 
1272 		programCollection.glslSources.add("chit") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1273 	}
1274 
1275 	if (storeInAHit)
1276 	{
1277 		std::stringstream css;
1278 		css
1279 			<< "#version 460 core\n"
1280 			<< "#extension GL_EXT_ray_tracing : require\n"
1281 			<< ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1282 			<< imageDeclaration
1283 			<< "void main()\n"
1284 			<< "{\n"
1285 			<< ((m_data.useCullMask) ? storeCullMask : storeCustomIndex)
1286 			<< "}\n";
1287 
1288 		programCollection.glslSources.add("ahit") << glu::AnyHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1289 	}
1290 
1291 	{
1292 		std::stringstream css;
1293 		css
1294 			<< "#version 460 core\n"
1295 			<< "#extension GL_EXT_ray_tracing : require\n"
1296 			<< ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1297 			<< "hitAttributeEXT ivec4 hitAttribute;\n";
1298 
1299 		if (storeInISec)
1300 			css << imageDeclaration;
1301 
1302 		css
1303 			<< "void main()\n"
1304 			<< "{\n"
1305 			<< "  hitAttribute = ivec4(0,0,0,0);\n"
1306 			<< "  reportIntersectionEXT(0.5f, 0);\n";
1307 		if (storeInISec)
1308 		{
1309 			if (m_data.useCullMask)
1310 			{
1311 				css << storeCullMask;
1312 			}
1313 			else
1314 			{
1315 				css << storeCustomIndex;
1316 			}
1317 		}
1318 
1319 		css << "}\n";
1320 
1321 		programCollection.glslSources.add("isect") << glu::IntersectionSource(updateRayTracingGLSL(css.str())) << buildOptions;
1322 	}
1323 
1324 	{
1325 		std::stringstream css;
1326 		css
1327 			<< "#version 460 core\n"
1328 			<< "#extension GL_EXT_ray_tracing : require\n"
1329 			<< ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1330 			<< "layout(location = 0) rayPayloadInEXT ivec4 hitValue;\n";
1331 
1332 		if (!storeInRGen)
1333 			css << imageDeclaration;
1334 
1335 		css
1336 			<< "void main()\n"
1337 			<< "{\n"
1338 			<< "  hitValue = ivec4(1,0,0,1);\n";
1339 		if (!storeInRGen)
1340 		{
1341 			if (m_data.useCullMask)
1342 			{
1343 				css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(bitfieldReverse(uint(gl_CullMaskEXT)), 0, 0, 1)); \n";
1344 			}
1345 			else
1346 			{
1347 				css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n";
1348 			}
1349 		}
1350 
1351 		css << "}\n";
1352 
1353 		programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1354 	}
1355 
1356 	{
1357 		std::stringstream css;
1358 		css <<
1359 			"#version 460 core\n"
1360 			"#extension GL_EXT_ray_tracing : require\n"
1361 			"layout(location = 0) rayPayloadEXT vec4 hitValue;\n"
1362 			"layout(r32f, set = 0, binding = 0) uniform image2D result;\n"
1363 			"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
1364 			"\n"
1365 			"vec3 calculateOrigin(vec3 zeroOrigin, vec3 xAxis, vec3 yAxis)\n"
1366 			"{\n"
1367 			"  return zeroOrigin + (float(gl_LaunchIDEXT.x)/float(gl_LaunchSizeEXT.x-1)) * xAxis + (float(gl_LaunchIDEXT.y)/float(gl_LaunchSizeEXT.y-1)) * yAxis;\n"
1368 			"}\n"
1369 			"\n"
1370 			"void main()\n"
1371 			"{\n"
1372 			"  float tmin      = 0.0;\n"
1373 			"  float tmax      = 2.0;\n"
1374 			"  vec3  origin    = calculateOrigin( vec3(0.1,0.1,1.0), vec3(0.2,0.0,0.0), vec3(0.0,0.2,0.0) );\n"
1375 			"  vec3  direction = vec3(0.0,0.0,-1.0);\n"
1376 			"  hitValue        = vec4(0.0,0.0,0.0,0.0);\n"
1377 			"  traceRayEXT(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);\n"
1378 			"  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n"
1379 			"}\n";
1380 		programCollection.glslSources.add("rgen_depth") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
1381 	}
1382 
1383 	{
1384 		std::stringstream css;
1385 		css <<
1386 			"#version 460 core\n"
1387 			"#extension GL_EXT_ray_tracing : require\n"
1388 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1389 			"void main()\n"
1390 			"{\n"
1391 			"  hitValue = vec4(gl_RayTmaxEXT,0.0,0.0,1.0);\n"
1392 			"}\n";
1393 
1394 		programCollection.glslSources.add("chit_depth") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1395 	}
1396 
1397 	{
1398 		std::stringstream css;
1399 		css <<
1400 			"#version 460 core\n"
1401 			"#extension GL_EXT_ray_tracing : require\n"
1402 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1403 			"void main()\n"
1404 			"{\n"
1405 			"  hitValue = vec4(0.0,0.0,0.0,1.0);\n"
1406 			"}\n";
1407 
1408 		programCollection.glslSources.add("miss_depth") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1409 	}
1410 }
1411 
createInstance(Context & context) const1412 TestInstance* RayTracingASBasicTestCase::createInstance (Context& context) const
1413 {
1414 	return new RayTracingASBasicTestInstance(context, m_data);
1415 }
1416 
RayTracingASFuncArgTestCase(tcu::TestContext & context,const char * name,const TestParams & data)1417 RayTracingASFuncArgTestCase::RayTracingASFuncArgTestCase (tcu::TestContext& context, const char* name, const TestParams& data)
1418 	: RayTracingASBasicTestCase (context, name, data)
1419 {
1420 }
1421 
initPrograms(SourceCollections & programCollection) const1422 void RayTracingASFuncArgTestCase::initPrograms (SourceCollections& programCollection) const
1423 {
1424 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
1425 	const vk::SpirVAsmBuildOptions	spvBuildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, true);
1426 
1427 	{
1428 		// The SPIR-V assembly below is based on the following GLSL code. Some
1429 		// modifications have been made to make traceRaysBottomWrapper take a bare
1430 		// acceleration structure as its argument instead of a pointer to it, so we can
1431 		// test passing a pointer and a bare value in the same test.
1432 		//
1433 		//	#version 460 core
1434 		//	#extension GL_EXT_ray_tracing : require
1435 		//	layout(location = 0) rayPayloadEXT vec4 hitValue;
1436 		//	layout(r32f, set = 0, binding = 0) uniform image2D result;
1437 		//	layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;
1438 		//
1439 		//	void traceRaysBottomWrapper(
1440 		//	  accelerationStructureEXT topLevel,
1441 		//	  uint rayFlags,
1442 		//	  uint cullMask,
1443 		//	  uint sbtRecordOffset,
1444 		//	  uint sbtRecordStride,
1445 		//	  uint missIndex,
1446 		//	  vec3 origin,
1447 		//	  float Tmin,
1448 		//	  vec3 direction,
1449 		//	  float Tmax)
1450 		//	{
1451 		//	  traceRayEXT(topLevel, rayFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, origin, Tmin, direction, Tmax, 0);
1452 		//	}
1453 		//
1454 		//	void traceRaysTopWrapper(
1455 		//	  accelerationStructureEXT topLevel,
1456 		//	  uint rayFlags,
1457 		//	  uint cullMask,
1458 		//	  uint sbtRecordOffset,
1459 		//	  uint sbtRecordStride,
1460 		//	  uint missIndex,
1461 		//	  vec3 origin,
1462 		//	  float Tmin,
1463 		//	  vec3 direction,
1464 		//	  float Tmax)
1465 		//	{
1466 		//	  traceRaysBottomWrapper(topLevel, rayFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, origin, Tmin, direction, Tmax);
1467 		//	}
1468 		//
1469 		//	vec3 calculateOrigin(vec3 zeroOrigin, vec3 xAxis, vec3 yAxis)
1470 		//	{
1471 		//	  return zeroOrigin + (float(gl_LaunchIDEXT.x)/float(gl_LaunchSizeEXT.x-1)) * xAxis + (float(gl_LaunchIDEXT.y)/float(gl_LaunchSizeEXT.y-1)) * yAxis;
1472 		//	}
1473 		//
1474 		//	void main()
1475 		//	{
1476 		//	  float tmin      = 0.0;
1477 		//	  float tmax      = 2.0;
1478 		//	  vec3  origin    = calculateOrigin( vec3(0.1,0.1,1.0), vec3(0.2,0.0,0.0), vec3(0.0,0.2,0.0) );
1479 		//	  vec3  direction = vec3(0.0,0.0,-1.0);
1480 		//	  hitValue        = vec4(0.0,0.0,0.0,0.0);
1481 		//	  traceRaysTopWrapper(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direction, tmax);
1482 		//	  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);
1483 		//	}
1484 
1485 		std::ostringstream rgen;
1486 		rgen
1487 			<< "; SPIR-V\n"
1488 			<< "; Version: 1.4\n"
1489 			<< "; Generator: Khronos Glslang Reference Front End; 10\n"
1490 			<< "; Bound: 156\n"
1491 			<< "; Schema: 0\n"
1492 			<< "OpCapability RayTracingKHR\n"
1493 			<< "OpExtension \"SPV_KHR_ray_tracing\"\n"
1494 			<< "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1495 			<< "OpMemoryModel Logical GLSL450\n"
1496 			<< "OpEntryPoint RayGenerationKHR %4 \"main\" %59 %82 %88 %130 %148\n"
1497 			<< "OpDecorate %59 Location 0\n"
1498 			<< "OpDecorate %82 BuiltIn LaunchIdKHR\n"
1499 			<< "OpDecorate %88 BuiltIn LaunchSizeKHR\n"
1500 			<< "OpDecorate %130 DescriptorSet 0\n"
1501 			<< "OpDecorate %130 Binding 1\n"
1502 			<< "OpDecorate %148 DescriptorSet 0\n"
1503 			<< "OpDecorate %148 Binding 0\n"
1504 			<< "%2 = OpTypeVoid\n"
1505 			<< "%3 = OpTypeFunction %2\n"
1506 
1507 			// This is the bare type.
1508 			<< "%6 = OpTypeAccelerationStructureKHR\n"
1509 
1510 			// This is the pointer type.
1511 			<< "%7 = OpTypePointer UniformConstant %6\n"
1512 
1513 			<< "%8 = OpTypeInt 32 0\n"
1514 			<< "%9 = OpTypePointer Function %8\n"
1515 			<< "%10 = OpTypeFloat 32\n"
1516 			<< "%11 = OpTypeVector %10 3\n"
1517 			<< "%12 = OpTypePointer Function %11\n"
1518 			<< "%13 = OpTypePointer Function %10\n"
1519 
1520 			// This is the type for traceRaysTopWrapper and also the original traceRaysBottomWrapper.
1521 			<< "%14 = OpTypeFunction %2 %7 %9 %9 %9 %9 %9 %12 %13 %12 %13\n"
1522 
1523 			// This is the modified type to take a bare AS as the first argument, for the modified version of traceRaysBottomWrapper.
1524 			<< "%14b = OpTypeFunction %2 %6 %9 %9 %9 %9 %9 %12 %13 %12 %13\n"
1525 
1526 			<< "%39 = OpTypeFunction %11 %12 %12 %12\n"
1527 			<< "%55 = OpTypeInt 32 1\n"
1528 			<< "%56 = OpConstant %55 0\n"
1529 			<< "%57 = OpTypeVector %10 4\n"
1530 			<< "%58 = OpTypePointer RayPayloadKHR %57\n"
1531 			<< "%59 = OpVariable %58 RayPayloadKHR\n"
1532 			<< "%80 = OpTypeVector %8 3\n"
1533 			<< "%81 = OpTypePointer Input %80\n"
1534 			<< "%82 = OpVariable %81 Input\n"
1535 			<< "%83 = OpConstant %8 0\n"
1536 			<< "%84 = OpTypePointer Input %8\n"
1537 			<< "%88 = OpVariable %81 Input\n"
1538 			<< "%91 = OpConstant %8 1\n"
1539 			<< "%112 = OpConstant %10 0\n"
1540 			<< "%114 = OpConstant %10 2\n"
1541 			<< "%116 = OpConstant %10 0.100000001\n"
1542 			<< "%117 = OpConstant %10 1\n"
1543 			<< "%118 = OpConstantComposite %11 %116 %116 %117\n"
1544 			<< "%119 = OpConstant %10 0.200000003\n"
1545 			<< "%120 = OpConstantComposite %11 %119 %112 %112\n"
1546 			<< "%121 = OpConstantComposite %11 %112 %119 %112\n"
1547 			<< "%127 = OpConstant %10 -1\n"
1548 			<< "%128 = OpConstantComposite %11 %112 %112 %127\n"
1549 			<< "%129 = OpConstantComposite %57 %112 %112 %112 %112\n"
1550 			<< "%130 = OpVariable %7 UniformConstant\n"
1551 			<< "%131 = OpConstant %8 255\n"
1552 			<< "%146 = OpTypeImage %10 2D 0 0 0 2 R32f\n"
1553 			<< "%147 = OpTypePointer UniformConstant %146\n"
1554 			<< "%148 = OpVariable %147 UniformConstant\n"
1555 			<< "%150 = OpTypeVector %8 2\n"
1556 			<< "%153 = OpTypeVector %55 2\n"
1557 
1558 			// This is main().
1559 			<< "%4 = OpFunction %2 None %3\n"
1560 			<< "%5 = OpLabel\n"
1561 			<< "%111 = OpVariable %13 Function\n"
1562 			<< "%113 = OpVariable %13 Function\n"
1563 			<< "%115 = OpVariable %12 Function\n"
1564 			<< "%122 = OpVariable %12 Function\n"
1565 			<< "%123 = OpVariable %12 Function\n"
1566 			<< "%124 = OpVariable %12 Function\n"
1567 			<< "%126 = OpVariable %12 Function\n"
1568 			<< "%132 = OpVariable %9 Function\n"
1569 			<< "%133 = OpVariable %9 Function\n"
1570 			<< "%134 = OpVariable %9 Function\n"
1571 			<< "%135 = OpVariable %9 Function\n"
1572 			<< "%136 = OpVariable %9 Function\n"
1573 			<< "%137 = OpVariable %12 Function\n"
1574 			<< "%139 = OpVariable %13 Function\n"
1575 			<< "%141 = OpVariable %12 Function\n"
1576 			<< "%143 = OpVariable %13 Function\n"
1577 			<< "OpStore %111 %112\n"
1578 			<< "OpStore %113 %114\n"
1579 			<< "OpStore %122 %118\n"
1580 			<< "OpStore %123 %120\n"
1581 			<< "OpStore %124 %121\n"
1582 			<< "%125 = OpFunctionCall %11 %43 %122 %123 %124\n"
1583 			<< "OpStore %115 %125\n"
1584 			<< "OpStore %126 %128\n"
1585 			<< "OpStore %59 %129\n"
1586 			<< "OpStore %132 %83\n"
1587 			<< "OpStore %133 %131\n"
1588 			<< "OpStore %134 %83\n"
1589 			<< "OpStore %135 %83\n"
1590 			<< "OpStore %136 %83\n"
1591 			<< "%138 = OpLoad %11 %115\n"
1592 			<< "OpStore %137 %138\n"
1593 			<< "%140 = OpLoad %10 %111\n"
1594 			<< "OpStore %139 %140\n"
1595 			<< "%142 = OpLoad %11 %126\n"
1596 			<< "OpStore %141 %142\n"
1597 			<< "%144 = OpLoad %10 %113\n"
1598 			<< "OpStore %143 %144\n"
1599 			<< "%145 = OpFunctionCall %2 %37 %130 %132 %133 %134 %135 %136 %137 %139 %141 %143\n"
1600 			<< "%149 = OpLoad %146 %148\n"
1601 			<< "%151 = OpLoad %80 %82\n"
1602 			<< "%152 = OpVectorShuffle %150 %151 %151 0 1\n"
1603 			<< "%154 = OpBitcast %153 %152\n"
1604 			<< "%155 = OpLoad %57 %59\n"
1605 			<< "OpImageWrite %149 %154 %155\n"
1606 			<< "OpReturn\n"
1607 			<< "OpFunctionEnd\n"
1608 
1609 			// This is traceRaysBottomWrapper, doing the OpTraceRayKHR call.
1610 			// We have modified the type so it takes a bare AS as the first argument.
1611 			// %25 = OpFunction %2 None %14
1612 			<< "%25 = OpFunction %2 None %14b\n"
1613 
1614 			// Also the type of the first argument here.
1615 			// %15 = OpFunctionParameter %7
1616 			<< "%15 = OpFunctionParameter %6\n"
1617 
1618 			<< "%16 = OpFunctionParameter %9\n"
1619 			<< "%17 = OpFunctionParameter %9\n"
1620 			<< "%18 = OpFunctionParameter %9\n"
1621 			<< "%19 = OpFunctionParameter %9\n"
1622 			<< "%20 = OpFunctionParameter %9\n"
1623 			<< "%21 = OpFunctionParameter %12\n"
1624 			<< "%22 = OpFunctionParameter %13\n"
1625 			<< "%23 = OpFunctionParameter %12\n"
1626 			<< "%24 = OpFunctionParameter %13\n"
1627 			<< "%26 = OpLabel\n"
1628 
1629 			// We no longer need to dereference the pointer here.
1630 			// %45 = OpLoad %6 %15
1631 
1632 			<< "%46 = OpLoad %8 %16\n"
1633 			<< "%47 = OpLoad %8 %17\n"
1634 			<< "%48 = OpLoad %8 %18\n"
1635 			<< "%49 = OpLoad %8 %19\n"
1636 			<< "%50 = OpLoad %8 %20\n"
1637 			<< "%51 = OpLoad %11 %21\n"
1638 			<< "%52 = OpLoad %10 %22\n"
1639 			<< "%53 = OpLoad %11 %23\n"
1640 			<< "%54 = OpLoad %10 %24\n"
1641 
1642 			// And we can use the first argument here directly.
1643 			// OpTraceRayKHR %45 %46 %47 %48 %49 %50 %51 %52 %53 %54 %59
1644 			<< "OpTraceRayKHR %15 %46 %47 %48 %49 %50 %51 %52 %53 %54 %59\n"
1645 
1646 			<< "OpReturn\n"
1647 			<< "OpFunctionEnd\n"
1648 
1649 			// This is traceRaysTopWrapper, which calls traceRaysBottomWrapper.
1650 			<< "%37 = OpFunction %2 None %14\n"
1651 
1652 			// First argument, pointer to AS.
1653 			<< "%27 = OpFunctionParameter %7\n"
1654 
1655 			<< "%28 = OpFunctionParameter %9\n"
1656 			<< "%29 = OpFunctionParameter %9\n"
1657 			<< "%30 = OpFunctionParameter %9\n"
1658 			<< "%31 = OpFunctionParameter %9\n"
1659 			<< "%32 = OpFunctionParameter %9\n"
1660 			<< "%33 = OpFunctionParameter %12\n"
1661 			<< "%34 = OpFunctionParameter %13\n"
1662 			<< "%35 = OpFunctionParameter %12\n"
1663 			<< "%36 = OpFunctionParameter %13\n"
1664 			<< "%38 = OpLabel\n"
1665 			<< "%60 = OpVariable %9 Function\n"
1666 			<< "%62 = OpVariable %9 Function\n"
1667 			<< "%64 = OpVariable %9 Function\n"
1668 			<< "%66 = OpVariable %9 Function\n"
1669 			<< "%68 = OpVariable %9 Function\n"
1670 			<< "%70 = OpVariable %12 Function\n"
1671 			<< "%72 = OpVariable %13 Function\n"
1672 			<< "%74 = OpVariable %12 Function\n"
1673 			<< "%76 = OpVariable %13 Function\n"
1674 
1675 			// Dereference the pointer to pass the AS as the first argument.
1676 			<< "%27b = OpLoad %6 %27\n"
1677 
1678 			<< "%61 = OpLoad %8 %28\n"
1679 			<< "OpStore %60 %61\n"
1680 			<< "%63 = OpLoad %8 %29\n"
1681 			<< "OpStore %62 %63\n"
1682 			<< "%65 = OpLoad %8 %30\n"
1683 			<< "OpStore %64 %65\n"
1684 			<< "%67 = OpLoad %8 %31\n"
1685 			<< "OpStore %66 %67\n"
1686 			<< "%69 = OpLoad %8 %32\n"
1687 			<< "OpStore %68 %69\n"
1688 			<< "%71 = OpLoad %11 %33\n"
1689 			<< "OpStore %70 %71\n"
1690 			<< "%73 = OpLoad %10 %34\n"
1691 			<< "OpStore %72 %73\n"
1692 			<< "%75 = OpLoad %11 %35\n"
1693 			<< "OpStore %74 %75\n"
1694 			<< "%77 = OpLoad %10 %36\n"
1695 			<< "OpStore %76 %77\n"
1696 
1697 			// %2 is void, %25 is traceRaysBottomWrapper and %27 was the first argument.
1698 			// We need to pass the loaded AS instead.
1699 			// %78 = OpFunctionCall %2 %25 %27 %60 %62 %64 %66 %68 %70 %72 %74 %76
1700 			<< "%78 = OpFunctionCall %2 %25 %27b %60 %62 %64 %66 %68 %70 %72 %74 %76\n"
1701 
1702 			<< "OpReturn\n"
1703 			<< "OpFunctionEnd\n"
1704 
1705 			// This is calculateOrigin().
1706 			<< "%43 = OpFunction %11 None %39\n"
1707 			<< "%40 = OpFunctionParameter %12\n"
1708 			<< "%41 = OpFunctionParameter %12\n"
1709 			<< "%42 = OpFunctionParameter %12\n"
1710 			<< "%44 = OpLabel\n"
1711 			<< "%79 = OpLoad %11 %40\n"
1712 			<< "%85 = OpAccessChain %84 %82 %83\n"
1713 			<< "%86 = OpLoad %8 %85\n"
1714 			<< "%87 = OpConvertUToF %10 %86\n"
1715 			<< "%89 = OpAccessChain %84 %88 %83\n"
1716 			<< "%90 = OpLoad %8 %89\n"
1717 			<< "%92 = OpISub %8 %90 %91\n"
1718 			<< "%93 = OpConvertUToF %10 %92\n"
1719 			<< "%94 = OpFDiv %10 %87 %93\n"
1720 			<< "%95 = OpLoad %11 %41\n"
1721 			<< "%96 = OpVectorTimesScalar %11 %95 %94\n"
1722 			<< "%97 = OpFAdd %11 %79 %96\n"
1723 			<< "%98 = OpAccessChain %84 %82 %91\n"
1724 			<< "%99 = OpLoad %8 %98\n"
1725 			<< "%100 = OpConvertUToF %10 %99\n"
1726 			<< "%101 = OpAccessChain %84 %88 %91\n"
1727 			<< "%102 = OpLoad %8 %101\n"
1728 			<< "%103 = OpISub %8 %102 %91\n"
1729 			<< "%104 = OpConvertUToF %10 %103\n"
1730 			<< "%105 = OpFDiv %10 %100 %104\n"
1731 			<< "%106 = OpLoad %11 %42\n"
1732 			<< "%107 = OpVectorTimesScalar %11 %106 %105\n"
1733 			<< "%108 = OpFAdd %11 %97 %107\n"
1734 			<< "OpReturnValue %108\n"
1735 			<< "OpFunctionEnd\n"
1736 			;
1737 
1738 		programCollection.spirvAsmSources.add("rgen_depth") << spvBuildOptions << rgen.str();
1739 	}
1740 
1741 	// chit_depth and miss_depth below have been left untouched.
1742 
1743 	{
1744 		std::stringstream css;
1745 		css <<
1746 			"#version 460 core\n"
1747 			"#extension GL_EXT_ray_tracing : require\n"
1748 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1749 			"void main()\n"
1750 			"{\n"
1751 			"  hitValue = vec4(gl_RayTmaxEXT,0.0,0.0,1.0);\n"
1752 			"}\n";
1753 
1754 		programCollection.glslSources.add("chit_depth") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1755 	}
1756 
1757 	{
1758 		std::stringstream css;
1759 		css <<
1760 			"#version 460 core\n"
1761 			"#extension GL_EXT_ray_tracing : require\n"
1762 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1763 			"void main()\n"
1764 			"{\n"
1765 			"  hitValue = vec4(0.0,0.0,0.0,1.0);\n"
1766 			"}\n";
1767 
1768 		programCollection.glslSources.add("miss_depth") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1769 	}
1770 }
1771 
RayTracingASBasicTestInstance(Context & context,const TestParams & data)1772 RayTracingASBasicTestInstance::RayTracingASBasicTestInstance (Context& context, const TestParams& data)
1773 	: vkt::TestInstance		(context)
1774 	, m_data				(data)
1775 {
1776 }
1777 
runTest(const deUint32 workerThreadsCount)1778 de::MovePtr<BufferWithMemory> RayTracingASBasicTestInstance::runTest(const deUint32 workerThreadsCount)
1779 {
1780 	const InstanceInterface&			vki									= m_context.getInstanceInterface();
1781 	const DeviceInterface&				vkd									= m_context.getDeviceInterface();
1782 	const VkDevice						device								= m_context.getDevice();
1783 	const VkPhysicalDevice				physicalDevice						= m_context.getPhysicalDevice();
1784 	const deUint32						queueFamilyIndex					= m_context.getUniversalQueueFamilyIndex();
1785 	const VkQueue						queue								= m_context.getUniversalQueue();
1786 	Allocator&							allocator							= m_context.getDefaultAllocator();
1787 	const deUint32						pixelCount							= m_data.width * m_data.height;
1788 	const deUint32						shaderGroupHandleSize				= getShaderGroupSize(vki, physicalDevice);
1789 	const deUint32						shaderGroupBaseAlignment			= getShaderGroupBaseAlignment(vki, physicalDevice);
1790 	const bool							htCopy								= (workerThreadsCount != 0) && (m_data.operationType == OP_COPY);
1791 	const bool							htSerialize							= (workerThreadsCount != 0) && (m_data.operationType == OP_SERIALIZE);
1792 
1793 	const Move<VkDescriptorSetLayout>	descriptorSetLayout					= DescriptorSetLayoutBuilder()
1794 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
1795 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
1796 																					.build(vkd, device);
1797 	const Move<VkDescriptorPool>		descriptorPool						= DescriptorPoolBuilder()
1798 																					.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1799 																					.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1800 																					.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1801 	const Move<VkDescriptorSet>			descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
1802 	const Move<VkPipelineLayout>		pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
1803 
1804 	de::MovePtr<RayTracingPipeline>		rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
1805 	m_data.testConfiguration->initRayTracingShaders(rayTracingPipeline, m_context, m_data);
1806 	Move<VkPipeline>					pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
1807 
1808 	de::MovePtr<BufferWithMemory>		raygenShaderBindingTable;
1809 	de::MovePtr<BufferWithMemory>		hitShaderBindingTable;
1810 	de::MovePtr<BufferWithMemory>		missShaderBindingTable;
1811 	m_data.testConfiguration->initShaderBindingTables(rayTracingPipeline, m_context, m_data, *pipeline, shaderGroupHandleSize, shaderGroupBaseAlignment, raygenShaderBindingTable, hitShaderBindingTable, missShaderBindingTable);
1812 
1813 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(),	0),	shaderGroupHandleSize,	shaderGroupHandleSize);
1814 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(),		0),	shaderGroupHandleSize,	shaderGroupHandleSize);
1815 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(),		0),	shaderGroupHandleSize,	shaderGroupHandleSize);
1816 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL,																	0,						0);
1817 
1818 	const VkFormat						imageFormat							= m_data.testConfiguration->getResultImageFormat();
1819 	const VkImageCreateInfo				imageCreateInfo						= makeImageCreateInfo(m_data.width, m_data.height, imageFormat);
1820 	const VkImageSubresourceRange		imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
1821 	const de::MovePtr<ImageWithMemory>	image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
1822 	const Move<VkImageView>				imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, imageFormat, imageSubresourceRange);
1823 
1824 	const VkBufferCreateInfo			resultBufferCreateInfo				= makeBufferCreateInfo(pixelCount*m_data.testConfiguration->getResultImageFormatSize(), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1825 	const VkImageSubresourceLayers		resultBufferImageSubresourceLayers	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1826 	const VkBufferImageCopy				resultBufferImageRegion				= makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), resultBufferImageSubresourceLayers);
1827 	de::MovePtr<BufferWithMemory>		resultBuffer						= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
1828 
1829 	const VkDescriptorImageInfo			descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
1830 
1831 	const Move<VkCommandPool>			cmdPool								= createCommandPool(vkd, device, 0, queueFamilyIndex);
1832 	const Move<VkCommandBuffer>			cmdBuffer							= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1833 
1834 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomLevelAccelerationStructures;
1835 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructure;
1836 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomLevelAccelerationStructureCopies;
1837 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructureCopy;
1838 	std::vector<de::SharedPtr<SerialStorage>>						bottomSerialized;
1839 	std::vector<de::SharedPtr<SerialStorage>>						topSerialized;
1840 	std::vector<VkDeviceSize>			accelerationCompactedSizes;
1841 	std::vector<VkDeviceSize>			accelerationSerialSizes;
1842 	Move<VkQueryPool>					m_queryPoolCompact;
1843 	Move<VkQueryPool>					m_queryPoolSerial;
1844 
1845 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
1846 	{
1847 		const VkImageMemoryBarrier				preImageBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
1848 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1849 			**image, imageSubresourceRange);
1850 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
1851 		const VkClearValue						clearValue = m_data.testConfiguration->getClearValue();
1852 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
1853 		const VkImageMemoryBarrier				postImageBarrier = makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
1854 			VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
1855 			**image, imageSubresourceRange);
1856 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
1857 
1858 		// build bottom level acceleration structures and their copies ( only when we are testing copying bottom level acceleration structures )
1859 		bool									bottomCompact		= m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
1860 		bool									bottomSerial		= m_data.operationType == OP_SERIALIZE && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
1861 		const bool								buildWithoutGeom	= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM);
1862 		const bool								bottomNoPrimitives	= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM);
1863 		const bool								topNoPrimitives		= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP);
1864 		const bool								inactiveInstances	= (m_data.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_INSTANCES);
1865 		bottomLevelAccelerationStructures							= m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
1866 		VkBuildAccelerationStructureFlagsKHR	allowCompactionFlag	= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR;
1867 		VkBuildAccelerationStructureFlagsKHR	emptyCompactionFlag	= VkBuildAccelerationStructureFlagsKHR(0);
1868 		VkBuildAccelerationStructureFlagsKHR	bottomCompactFlags	= (bottomCompact ? allowCompactionFlag : emptyCompactionFlag);
1869 		VkBuildAccelerationStructureFlagsKHR	bottomBuildFlags	= m_data.buildFlags | bottomCompactFlags;
1870 		std::vector<VkAccelerationStructureKHR>	accelerationStructureHandles;
1871 		std::vector<VkDeviceSize>				bottomBlasCompactSize;
1872 		std::vector<VkDeviceSize>				bottomBlasSerialSize;
1873 
1874 		for (auto& blas : bottomLevelAccelerationStructures)
1875 		{
1876 			blas->setBuildType						(m_data.buildType);
1877 			blas->setBuildFlags						(bottomBuildFlags);
1878 			blas->setUseArrayOfPointers				(m_data.bottomUsesAOP);
1879 			blas->setCreateGeneric					(m_data.bottomGeneric);
1880 			blas->setCreationBufferUnbounded		(m_data.bottomUnboundedCreation);
1881 			blas->setBuildWithoutGeometries			(buildWithoutGeom);
1882 			blas->setBuildWithoutPrimitives			(bottomNoPrimitives);
1883 			blas->createAndBuild					(vkd, device, *cmdBuffer, allocator);
1884 			accelerationStructureHandles.push_back	(*(blas->getPtr()));
1885 		}
1886 
1887 		if (m_data.operationType == OP_COMPACT)
1888 		{
1889 			deUint32 queryCount	= (m_data.operationTarget == OT_BOTTOM_ACCELERATION) ? deUint32(bottomLevelAccelerationStructures.size()) : 1u;
1890 			if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1891 				m_queryPoolCompact = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount);
1892 			if (m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1893 				queryAccelerationStructureSize(vkd, device, *cmdBuffer, accelerationStructureHandles, m_data.buildType, m_queryPoolCompact.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, bottomBlasCompactSize);
1894 		}
1895 		if (m_data.operationType == OP_SERIALIZE)
1896 		{
1897 			deUint32 queryCount	= (m_data.operationTarget == OT_BOTTOM_ACCELERATION) ? deUint32(bottomLevelAccelerationStructures.size()) : 1u;
1898 			if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1899 				m_queryPoolSerial = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
1900 			if (m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1901 				queryAccelerationStructureSize(vkd, device, *cmdBuffer, accelerationStructureHandles, m_data.buildType, m_queryPoolSerial.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, bottomBlasSerialSize);
1902 		}
1903 
1904 		// if AS is built on GPU and we are planning to make a compact copy of it or serialize / deserialize it - we have to have download query results to CPU
1905 		if ((m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) && (bottomCompact || bottomSerial))
1906 		{
1907 			endCommandBuffer(vkd, *cmdBuffer);
1908 
1909 			submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1910 
1911 			if (bottomCompact)
1912 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolCompact, 0u, deUint32(bottomBlasCompactSize.size()), sizeof(VkDeviceSize) * bottomBlasCompactSize.size(), bottomBlasCompactSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1913 			if (bottomSerial)
1914 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolSerial, 0u, deUint32(bottomBlasSerialSize.size()), sizeof(VkDeviceSize) * bottomBlasSerialSize.size(), bottomBlasSerialSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1915 
1916 			vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1917 			beginCommandBuffer(vkd, *cmdBuffer, 0u);
1918 		}
1919 
1920 		auto bottomLevelAccelerationStructuresPtr								= &bottomLevelAccelerationStructures;
1921 		if (m_data.operationType != OP_NONE && m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1922 		{
1923 			switch (m_data.operationType)
1924 			{
1925 			case OP_COPY:
1926 			{
1927 				for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1928 				{
1929 					de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1930 					asCopy->setDeferredOperation(htCopy, workerThreadsCount);
1931 					asCopy->setBuildType(m_data.buildType);
1932 					asCopy->setBuildFlags(m_data.buildFlags);
1933 					asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1934 					asCopy->setCreateGeneric(m_data.bottomGeneric);
1935 					asCopy->setCreationBufferUnbounded(m_data.bottomUnboundedCreation);
1936 					asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1937 					asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1938 					asCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, bottomLevelAccelerationStructures[i].get(), 0u, 0u);
1939 					bottomLevelAccelerationStructureCopies.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1940 				}
1941 				break;
1942 			}
1943 			case OP_COMPACT:
1944 			{
1945 				for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1946 				{
1947 					de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1948 					asCopy->setBuildType(m_data.buildType);
1949 					asCopy->setBuildFlags(m_data.buildFlags);
1950 					asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1951 					asCopy->setCreateGeneric(m_data.bottomGeneric);
1952 					asCopy->setCreationBufferUnbounded(m_data.bottomUnboundedCreation);
1953 					asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1954 					asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1955 					asCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, bottomLevelAccelerationStructures[i].get(), bottomBlasCompactSize[i], 0u);
1956 					bottomLevelAccelerationStructureCopies.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1957 				}
1958 				break;
1959 			}
1960 			case OP_SERIALIZE:
1961 			{
1962 				//bottomLevelAccelerationStructureCopies = m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
1963 				for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1964 				{
1965 					de::SharedPtr<SerialStorage> storage ( new SerialStorage(vkd, device, allocator, m_data.buildType, bottomBlasSerialSize[i]));
1966 
1967 					bottomLevelAccelerationStructures[i]->setDeferredOperation(htSerialize, workerThreadsCount);
1968 					bottomLevelAccelerationStructures[i]->serialize(vkd, device, *cmdBuffer, storage.get());
1969 					bottomSerialized.push_back(storage);
1970 
1971 					if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1972 					{
1973 						endCommandBuffer(vkd, *cmdBuffer);
1974 
1975 						submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1976 
1977 						vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1978 						beginCommandBuffer(vkd, *cmdBuffer, 0u);
1979 					}
1980 
1981 					de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1982 					asCopy->setBuildType(m_data.buildType);
1983 					asCopy->setBuildFlags(m_data.buildFlags);
1984 					asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1985 					asCopy->setCreateGeneric(m_data.bottomGeneric);
1986 					asCopy->setCreationBufferUnbounded(m_data.bottomUnboundedCreation);
1987 					asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1988 					asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1989 					asCopy->setDeferredOperation(htSerialize, workerThreadsCount);
1990 					asCopy->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, storage.get(), 0u);
1991 					bottomLevelAccelerationStructureCopies.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1992 				}
1993 				break;
1994 			}
1995 			default:
1996 				DE_ASSERT(DE_FALSE);
1997 			}
1998 			bottomLevelAccelerationStructuresPtr = &bottomLevelAccelerationStructureCopies;
1999 		}
2000 
2001 		// build top level acceleration structures and their copies ( only when we are testing copying top level acceleration structures )
2002 		bool									topCompact			= m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_TOP_ACCELERATION;
2003 		bool									topSerial			= m_data.operationType == OP_SERIALIZE && m_data.operationTarget == OT_TOP_ACCELERATION;
2004 		VkBuildAccelerationStructureFlagsKHR	topCompactFlags		= (topCompact ? allowCompactionFlag : emptyCompactionFlag);
2005 		VkBuildAccelerationStructureFlagsKHR	topBuildFlags		= m_data.buildFlags | topCompactFlags;
2006 		std::vector<VkAccelerationStructureKHR> topLevelStructureHandles;
2007 		std::vector<VkDeviceSize>				topBlasCompactSize;
2008 		std::vector<VkDeviceSize>				topBlasSerialSize;
2009 
2010 		topLevelAccelerationStructure								= m_data.testConfiguration->initTopAccelerationStructure(m_context, m_data, *bottomLevelAccelerationStructuresPtr);
2011 		topLevelAccelerationStructure->setBuildType					(m_data.buildType);
2012 		topLevelAccelerationStructure->setBuildFlags				(topBuildFlags);
2013 		topLevelAccelerationStructure->setBuildWithoutPrimitives	(topNoPrimitives);
2014 		topLevelAccelerationStructure->setUseArrayOfPointers		(m_data.topUsesAOP);
2015 		topLevelAccelerationStructure->setCreateGeneric				(m_data.topGeneric);
2016 		topLevelAccelerationStructure->setCreationBufferUnbounded	(m_data.topUnboundedCreation);
2017 		topLevelAccelerationStructure->setInactiveInstances			(inactiveInstances);
2018 		topLevelAccelerationStructure->createAndBuild				(vkd, device, *cmdBuffer, allocator);
2019 		topLevelStructureHandles.push_back							(*(topLevelAccelerationStructure->getPtr()));
2020 
2021 		if (topCompact)
2022 			queryAccelerationStructureSize(vkd, device, *cmdBuffer, topLevelStructureHandles, m_data.buildType, m_queryPoolCompact.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, topBlasCompactSize);
2023 		if (topSerial)
2024 			queryAccelerationStructureSize(vkd, device, *cmdBuffer, topLevelStructureHandles, m_data.buildType, m_queryPoolSerial.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, topBlasSerialSize);
2025 
2026 		// if AS is built on GPU and we are planning to make a compact copy of it or serialize / deserialize it - we have to have download query results to CPU
2027 		if ((m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) && (topCompact || topSerial))
2028 		{
2029 			endCommandBuffer(vkd, *cmdBuffer);
2030 
2031 			submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2032 
2033 			if (topCompact)
2034 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolCompact, 0u, deUint32(topBlasCompactSize.size()), sizeof(VkDeviceSize) * topBlasCompactSize.size(), topBlasCompactSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2035 			if (topSerial)
2036 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolSerial, 0u, deUint32(topBlasSerialSize.size()), sizeof(VkDeviceSize) * topBlasSerialSize.size(), topBlasSerialSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2037 
2038 			vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2039 			beginCommandBuffer(vkd, *cmdBuffer, 0u);
2040 		}
2041 
2042 		const TopLevelAccelerationStructure*			topLevelRayTracedPtr	= topLevelAccelerationStructure.get();
2043 		if (m_data.operationType != OP_NONE && m_data.operationTarget == OT_TOP_ACCELERATION)
2044 		{
2045 			switch (m_data.operationType)
2046 			{
2047 				case OP_COPY:
2048 				{
2049 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
2050 					topLevelAccelerationStructureCopy->setDeferredOperation(htCopy, workerThreadsCount);
2051 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
2052 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2053 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
2054 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
2055 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
2056 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
2057 					topLevelAccelerationStructureCopy->setCreationBufferUnbounded(m_data.topUnboundedCreation);
2058 					topLevelAccelerationStructureCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, topLevelAccelerationStructure.get(), 0u, 0u);
2059 					break;
2060 				}
2061 				case OP_COMPACT:
2062 				{
2063 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
2064 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
2065 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2066 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
2067 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
2068 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
2069 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
2070 					topLevelAccelerationStructureCopy->setCreationBufferUnbounded(m_data.topUnboundedCreation);
2071 					topLevelAccelerationStructureCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, topLevelAccelerationStructure.get(), topBlasCompactSize[0], 0u);
2072 					break;
2073 				}
2074 				case OP_SERIALIZE:
2075 				{
2076 					de::SharedPtr<SerialStorage> storage = de::SharedPtr<SerialStorage>(new SerialStorage(vkd, device, allocator, m_data.buildType, topBlasSerialSize[0]));
2077 
2078 					topLevelAccelerationStructure->setDeferredOperation(htSerialize, workerThreadsCount);
2079 					topLevelAccelerationStructure->serialize(vkd, device, *cmdBuffer, storage.get());
2080 					topSerialized.push_back(storage);
2081 
2082 					if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2083 					{
2084 						endCommandBuffer(vkd, *cmdBuffer);
2085 
2086 						submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2087 
2088 						vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2089 						beginCommandBuffer(vkd, *cmdBuffer, 0u);
2090 					}
2091 
2092 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
2093 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
2094 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2095 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
2096 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
2097 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
2098 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
2099 					topLevelAccelerationStructureCopy->setCreationBufferUnbounded(m_data.topUnboundedCreation);
2100 					topLevelAccelerationStructureCopy->setDeferredOperation(htSerialize, workerThreadsCount);
2101 					topLevelAccelerationStructureCopy->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, storage.get(), 0u);
2102 					break;
2103 				}
2104 				case OP_UPDATE:
2105 				{
2106 					topLevelAccelerationStructureCopy = m_data.testConfiguration->initTopAccelerationStructure(m_context, m_data, *bottomLevelAccelerationStructuresPtr);
2107 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2108 					topLevelAccelerationStructureCopy->create(vkd, device, allocator, 0u, 0u);
2109 					// Update AS based on topLevelAccelerationStructure
2110 					topLevelAccelerationStructureCopy->build(vkd, device, *cmdBuffer, topLevelAccelerationStructure.get());
2111 					break;
2112 				}
2113 				case OP_UPDATE_IN_PLACE:
2114 				{
2115 					// Update in place
2116 					topLevelAccelerationStructure->build(vkd, device, *cmdBuffer, topLevelAccelerationStructure.get());
2117 					// Make a coppy
2118 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
2119 					topLevelAccelerationStructureCopy->setDeferredOperation(htCopy, workerThreadsCount);
2120 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
2121 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2122 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
2123 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
2124 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
2125 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
2126 					topLevelAccelerationStructureCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, topLevelAccelerationStructure.get(), 0u, 0u);
2127 					break;
2128 				}
2129 				default:
2130 					DE_ASSERT(DE_FALSE);
2131 			}
2132 			topLevelRayTracedPtr = topLevelAccelerationStructureCopy.get();
2133 		}
2134 
2135 		const VkMemoryBarrier preTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
2136 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &preTraceMemoryBarrier);
2137 
2138 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
2139 		{
2140 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
2141 			DE_NULL,															//  const void*							pNext;
2142 			1u,																	//  deUint32							accelerationStructureCount;
2143 			topLevelRayTracedPtr->getPtr(),										//  const VkAccelerationStructureKHR*	pAccelerationStructures;
2144 		};
2145 
2146 		DescriptorSetUpdateBuilder()
2147 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
2148 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
2149 			.update(vkd, device);
2150 
2151 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
2152 
2153 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
2154 
2155 		cmdTraceRays(vkd,
2156 			*cmdBuffer,
2157 			&raygenShaderBindingTableRegion,
2158 			&missShaderBindingTableRegion,
2159 			&hitShaderBindingTableRegion,
2160 			&callableShaderBindingTableRegion,
2161 			m_data.width, m_data.height, 1);
2162 
2163 		const VkMemoryBarrier				postTraceMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
2164 		const VkMemoryBarrier				postCopyMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
2165 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
2166 
2167 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u, &resultBufferImageRegion);
2168 
2169 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
2170 	}
2171 	endCommandBuffer(vkd, *cmdBuffer);
2172 
2173 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2174 
2175 	invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(), resultBuffer->getAllocation().getOffset(), pixelCount * sizeof(deUint32));
2176 
2177 	return resultBuffer;
2178 }
2179 
iterateNoWorkers(void)2180 bool RayTracingASBasicTestInstance::iterateNoWorkers (void)
2181 {
2182 	// run test using arrays of pointers
2183 	const de::MovePtr<BufferWithMemory>	buffer		= runTest(0);
2184 
2185 	return m_data.testConfiguration->verifyImage(buffer.get(), m_context, m_data);
2186 }
2187 
iterateWithWorkers(void)2188 bool RayTracingASBasicTestInstance::iterateWithWorkers (void)
2189 {
2190 	de::MovePtr<BufferWithMemory>	singleThreadBufferCPU	= runTest(0);
2191 	const bool						singleThreadValidation	= m_data.testConfiguration->verifyImage(singleThreadBufferCPU.get(), m_context, m_data);
2192 
2193 	de::MovePtr<BufferWithMemory>	multiThreadBufferCPU	= runTest(m_data.workerThreadsCount);
2194 	const bool						multiThreadValidation	= m_data.testConfiguration->verifyImage(multiThreadBufferCPU.get(), m_context, m_data);
2195 
2196 	const deUint32					result					= singleThreadValidation && multiThreadValidation;
2197 
2198 	return result;
2199 }
2200 
iterate(void)2201 tcu::TestStatus RayTracingASBasicTestInstance::iterate (void)
2202 {
2203 	bool result;
2204 
2205 	if (m_data.workerThreadsCount != 0)
2206 		result = iterateWithWorkers();
2207 	else
2208 		result = iterateNoWorkers();
2209 
2210 	if (result)
2211 		return tcu::TestStatus::pass("Pass");
2212 	else
2213 		return tcu::TestStatus::fail("Fail");
2214 }
2215 
2216 // Tests dynamic indexing of acceleration structures
2217 class RayTracingASDynamicIndexingTestCase : public TestCase
2218 {
2219 public:
2220 						RayTracingASDynamicIndexingTestCase			(tcu::TestContext& context, const char* name);
2221 						~RayTracingASDynamicIndexingTestCase		(void) = default;
2222 
2223 	void				checkSupport								(Context& context) const override;
2224 	void				initPrograms								(SourceCollections& programCollection) const override;
2225 	TestInstance*		createInstance								(Context& context) const override;
2226 };
2227 
2228 class RayTracingASDynamicIndexingTestInstance : public TestInstance
2229 {
2230 public:
2231 						RayTracingASDynamicIndexingTestInstance		(Context& context);
2232 						~RayTracingASDynamicIndexingTestInstance	(void) = default;
2233 	tcu::TestStatus		iterate										(void) override;
2234 };
2235 
RayTracingASDynamicIndexingTestCase(tcu::TestContext & context,const char * name)2236 RayTracingASDynamicIndexingTestCase::RayTracingASDynamicIndexingTestCase(tcu::TestContext& context, const char* name)
2237 	: TestCase(context, name)
2238 {
2239 }
2240 
checkSupport(Context & context) const2241 void RayTracingASDynamicIndexingTestCase::checkSupport(Context& context) const
2242 {
2243 	commonASTestsCheckSupport(context);
2244 	context.requireDeviceFunctionality("VK_EXT_descriptor_indexing");
2245 }
2246 
initPrograms(SourceCollections & programCollection) const2247 void RayTracingASDynamicIndexingTestCase::initPrograms(SourceCollections& programCollection) const
2248 {
2249 	const vk::SpirVAsmBuildOptions spvBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, true);
2250 	const vk::ShaderBuildOptions glslBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
2251 
2252 	// raygen shader is defined in spir-v as it requires possing pointer to TLAS that was read from ssbo;
2253 	// original spir-v code was generated using following glsl code but resulting spir-v code was modiifed
2254 	//
2255 	// #version 460 core
2256 	// #extension GL_EXT_ray_tracing : require
2257 	// #extension GL_EXT_nonuniform_qualifier : enable
2258 	// #define ARRAY_SIZE 500
2259 	// layout(location = 0) rayPayloadEXT uvec2 payload;	// offset and flag indicating if we are using descriptors or pointers
2260 
2261 	// layout(set = 0, binding = 0) uniform accelerationStructureEXT tlasArray[ARRAY_SIZE];
2262 	// layout(set = 0, binding = 1) readonly buffer topLevelASPointers {
2263 	//     uvec2 ptr[];
2264 	// } tlasPointers;
2265 	// layout(set = 0, binding = 2) readonly buffer topLevelASIndices {
2266 	//     uint idx[];
2267 	// } tlasIndices;
2268 	// layout(set = 0, binding = 3, std430) writeonly buffer Result {
2269 	//     uint value[];
2270 	// } result;
2271 
2272 	// void main()
2273 	// {
2274 	//   float tmin            = 0.0;\n"
2275 	//   float tmax            = 2.0;\n"
2276 	//   vec3  origin          = vec3(0.25f, 0.5f, 1.0);\n"
2277 	//   vec3  direction       = vec3(0.0,0.0,-1.0);\n"
2278 	//   uint  activeTlasIndex = gl_LaunchIDEXT.x;\n"
2279 	//   uint  activeTlasCount = gl_LaunchSizeEXT.x;\n"
2280 	//   uint  tlasIndex       = tlasIndices.idx[nonuniformEXT(activeTlasIndex)];\n"
2281 
2282 	//   atomicAdd(result.value[nonuniformEXT(activeTlasIndex)], 2);\n"
2283 	//   payload = uvec2(activeTlasIndex + activeTlasCount.x, 0);\n"
2284 	//   traceRayEXT(tlasArray[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);\n"
2285 
2286 	//   atomicAdd(result.value[nonuniformEXT(activeTlasIndex + activeTlasCount * 2)], 5);\n"
2287 	//   payload = uvec2(activeTlasIndex + activeTlasCount * 3, 1);\n"
2288 	//   traceRayEXT(tlasArray[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);				// used to generate initial spirv
2289 	//   //traceRayEXT(*tlasPointers.ptr[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);	// not available in glsl but should be done in spirv
2290 	// };
2291 
2292 	const std::string rgenSource =
2293 		"OpCapability RayTracingKHR\n"
2294 		"OpCapability ShaderNonUniform\n"
2295 		"OpExtension \"SPV_EXT_descriptor_indexing\"\n"
2296 		"OpExtension \"SPV_KHR_ray_tracing\"\n"
2297 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2298 		"OpMemoryModel Logical GLSL450\n"
2299 		"OpEntryPoint RayGenerationKHR %4 \"main\" %27 %33 %var_tlas_indices %var_result %60 %var_as_arr_ptr %var_as_pointers_ssbo\n"
2300 		"OpDecorate %27 BuiltIn LaunchIdNV\n"
2301 		"OpDecorate %33 BuiltIn LaunchSizeNV\n"
2302 		"OpDecorate %37 ArrayStride 4\n"
2303 		"OpMemberDecorate %38 0 NonWritable\n"
2304 		"OpMemberDecorate %38 0 Offset 0\n"
2305 		"OpDecorate %38 Block\n"
2306 		"OpDecorate %var_tlas_indices DescriptorSet 0\n"
2307 		"OpDecorate %var_tlas_indices Binding 2\n"
2308 		"OpDecorate %44 NonUniform\n"
2309 		"OpDecorate %46 NonUniform\n"
2310 		"OpDecorate %47 NonUniform\n"
2311 		"OpDecorate %48 ArrayStride 4\n"
2312 		"OpMemberDecorate %49 0 NonReadable\n"
2313 		"OpMemberDecorate %49 0 Offset 0\n"
2314 		"OpDecorate %49 Block\n"
2315 		"OpDecorate %var_result DescriptorSet 0\n"
2316 		"OpDecorate %var_result Binding 3\n"
2317 		"OpDecorate %53 NonUniform\n"
2318 		"OpDecorate %60 Location 0\n"
2319 		"OpDecorate %var_as_arr_ptr DescriptorSet 0\n"
2320 		"OpDecorate %var_as_arr_ptr Binding 0\n"
2321 		"OpDecorate %71 NonUniform\n"
2322 		"OpDecorate %73 NonUniform\n"
2323 		"OpDecorate %74 NonUniform\n"
2324 		"OpDecorate %85 NonUniform\n"
2325 		"OpDecorate %as_index NonUniform\n"
2326 		"OpDecorate %as_device_addres NonUniform\n"
2327 		"OpDecorate %104 ArrayStride 8\n"
2328 		"OpMemberDecorate %105 0 NonWritable\n"
2329 		"OpMemberDecorate %105 0 Offset 0\n"
2330 		"OpDecorate %105 Block\n"
2331 		"OpDecorate %var_as_pointers_ssbo DescriptorSet 0\n"
2332 		"OpDecorate %var_as_pointers_ssbo Binding 1\n"
2333 		// types, constants and variables
2334 		"%2								= OpTypeVoid\n"
2335 		"%3								= OpTypeFunction %2\n"
2336 		"%6								= OpTypeFloat 32\n"
2337 		"%7								= OpTypePointer Function %6\n"
2338 		"%9								= OpConstant %6 0\n"
2339 		"%11							= OpConstant %6 2\n"
2340 		"%12							= OpTypeVector %6 3\n"
2341 		"%13							= OpTypePointer Function %12\n"
2342 		"%15							= OpConstant %6 0.25\n"
2343 		"%16							= OpConstant %6 0.5\n"
2344 		"%17							= OpConstant %6 1\n"
2345 		"%18							= OpConstantComposite %12 %15 %16 %17\n"
2346 		"%20							= OpConstant %6 -1\n"
2347 		"%21							= OpConstantComposite %12 %9 %9 %20\n"
2348 		"%type_uint32					= OpTypeInt 32 0\n"
2349 		"%23							= OpTypePointer Function %type_uint32\n"
2350 		"%25							= OpTypeVector %type_uint32 3\n"
2351 		"%26							= OpTypePointer Input %25\n"
2352 		"%27							= OpVariable %26 Input\n"
2353 		"%28							= OpConstant %type_uint32 0\n"
2354 		"%29							= OpTypePointer Input %type_uint32\n"
2355 		"%33							= OpVariable %26 Input\n"
2356 		"%37							= OpTypeRuntimeArray %type_uint32\n"
2357 		"%38							= OpTypeStruct %37\n"
2358 		"%39							= OpTypePointer StorageBuffer %38\n"
2359 		"%var_tlas_indices				= OpVariable %39 StorageBuffer\n"
2360 		"%type_int32					= OpTypeInt 32 1\n"
2361 		"%c_int32_0						= OpConstant %type_int32 0\n"
2362 		"%45							= OpTypePointer StorageBuffer %type_uint32\n"
2363 		"%48							= OpTypeRuntimeArray %type_uint32\n"
2364 		"%49							= OpTypeStruct %48\n"
2365 		"%50							= OpTypePointer StorageBuffer %49\n"
2366 		"%var_result					= OpVariable %50 StorageBuffer\n"
2367 		"%55							= OpConstant %type_uint32 2\n"
2368 		"%56							= OpConstant %type_uint32 1\n"
2369 		"%58							= OpTypeVector %type_uint32 2\n"
2370 		"%59							= OpTypePointer RayPayloadNV %58\n"
2371 		"%60							= OpVariable %59 RayPayloadNV\n"
2372 		"%type_as						= OpTypeAccelerationStructureKHR\n"
2373 		"%66							= OpConstant %type_uint32 500\n"
2374 		"%67							= OpTypeArray %type_as %66\n"
2375 		"%68							= OpTypePointer UniformConstant %67\n"
2376 		"%var_as_arr_ptr				= OpVariable %68 UniformConstant\n"
2377 		"%72							= OpTypePointer UniformConstant %type_as\n"
2378 		"%75							= OpConstant %type_uint32 16\n"
2379 		"%76							= OpConstant %type_uint32 255\n"
2380 		"%87							= OpConstant %type_uint32 5\n"
2381 		"%91							= OpConstant %type_uint32 3\n"
2382 
2383 		// <changed_section>
2384 		"%104							= OpTypeRuntimeArray %58\n"
2385 		"%105							= OpTypeStruct %104\n"
2386 		"%106							= OpTypePointer StorageBuffer %105\n"
2387 		"%var_as_pointers_ssbo			= OpVariable %106 StorageBuffer\n"
2388 		"%type_uint64_ssbo_ptr			= OpTypePointer StorageBuffer %58\n"
2389 		// </changed_section>
2390 
2391 		// void main()
2392 		"%4								= OpFunction %2 None %3\n"
2393 		"%5								= OpLabel\n"
2394 		"%8								= OpVariable %7 Function\n"
2395 		"%10							= OpVariable %7 Function\n"
2396 		"%14							= OpVariable %13 Function\n"
2397 		"%19							= OpVariable %13 Function\n"
2398 		"%24							= OpVariable %23 Function\n"
2399 		"%32							= OpVariable %23 Function\n"
2400 		"%36							= OpVariable %23 Function\n"
2401 		"OpStore %8 %9\n"
2402 		"OpStore %10 %11\n"
2403 		"OpStore %14 %18\n"
2404 		"OpStore %19 %21\n"
2405 		"%30							= OpAccessChain %29 %27 %28\n"
2406 		"%31							= OpLoad %type_uint32 %30\n"
2407 		"OpStore %24 %31\n"
2408 		"%34							= OpAccessChain %29 %33 %28\n"
2409 		"%35							= OpLoad %type_uint32 %34\n"
2410 		"OpStore %32 %35\n"
2411 		"%43							= OpLoad %type_uint32 %24\n"
2412 		"%44							= OpCopyObject %type_uint32 %43\n"
2413 		"%46							= OpAccessChain %45 %var_tlas_indices %c_int32_0 %44\n"
2414 		"%47							= OpLoad %type_uint32 %46\n"
2415 		"OpStore %36 %47\n"
2416 		// atomicAdd
2417 		"%52							= OpLoad %type_uint32 %24\n"
2418 		"%53							= OpCopyObject %type_uint32 %52\n"
2419 		"%54							= OpAccessChain %45 %var_result %c_int32_0 %53\n"
2420 		"%57							= OpAtomicIAdd %type_uint32 %54 %56 %28 %55\n"
2421 		// setup payload
2422 		"%61							= OpLoad %type_uint32 %24\n"
2423 		"%62							= OpLoad %type_uint32 %32\n"
2424 		"%63							= OpIAdd %type_uint32 %61 %62\n"
2425 		"%64							= OpCompositeConstruct %58 %63 %28\n"
2426 		"OpStore %60 %64\n"
2427 		// trace rays using tlas from array
2428 		"%70							= OpLoad %type_uint32 %36\n"
2429 		"%71							= OpCopyObject %type_uint32 %70\n"
2430 		"%73							= OpAccessChain %72 %var_as_arr_ptr %71\n"
2431 		"%74							= OpLoad %type_as %73\n"
2432 		"%77							= OpLoad %12 %14\n"
2433 		"%78							= OpLoad %6 %8\n"
2434 		"%79							= OpLoad %12 %19\n"
2435 		"%80							= OpLoad %6 %10\n"
2436 		"OpTraceRayKHR %74 %75 %76 %28 %28 %28 %77 %78 %79 %80 %60\n"
2437 		// atomicAdd
2438 		"%81							= OpLoad %type_uint32 %24\n"
2439 		"%82							= OpLoad %type_uint32 %32\n"
2440 		"%83							= OpIMul %type_uint32 %82 %55\n"
2441 		"%84							= OpIAdd %type_uint32 %81 %83\n"
2442 		"%85							= OpCopyObject %type_uint32 %84\n"
2443 		"%86							= OpAccessChain %45 %var_result %c_int32_0 %85\n"
2444 		"%88							= OpAtomicIAdd %type_uint32 %86 %56 %28 %87\n"
2445 		// setup payload
2446 		"%89							= OpLoad %type_uint32 %24\n"
2447 		"%90							= OpLoad %type_uint32 %32\n"
2448 		"%92							= OpIMul %type_uint32 %90 %91\n"
2449 		"%93							= OpIAdd %type_uint32 %89 %92\n"
2450 		"%94							= OpCompositeConstruct %58 %93 %56\n"
2451 		"OpStore %60 %94\n"
2452 		// trace rays using pointers to tlas
2453 		"%95							= OpLoad %type_uint32 %36\n"
2454 		"%as_index						= OpCopyObject %type_uint32 %95\n"
2455 
2456 		// <changed_section> OLD
2457 		"%as_device_addres_ptr			= OpAccessChain %type_uint64_ssbo_ptr %var_as_pointers_ssbo %c_int32_0 %as_index\n"
2458 		"%as_device_addres				= OpLoad %58 %as_device_addres_ptr\n"
2459 		"%as_to_use						= OpConvertUToAccelerationStructureKHR %type_as %as_device_addres\n"
2460 		// </changed_section>
2461 
2462 		"%99							= OpLoad %12 %14\n"
2463 		"%100							= OpLoad %6 %8\n"
2464 		"%101							= OpLoad %12 %19\n"
2465 		"%102							= OpLoad %6 %10\n"
2466 		"OpTraceRayKHR %as_to_use %75 %76 %28 %28 %28 %99 %100 %101 %102 %60\n"
2467 		"OpReturn\n"
2468 		"OpFunctionEnd\n";
2469 	programCollection.spirvAsmSources.add("rgen") << rgenSource << spvBuildOptions;
2470 
2471 	std::string chitSource =
2472 		"#version 460 core\n"
2473 		"#extension GL_EXT_ray_tracing : require\n"
2474 		"#extension GL_EXT_nonuniform_qualifier : enable\n"
2475 		"layout(location = 0) rayPayloadInEXT uvec2 payload;\n"
2476 		"\n"
2477 		"layout(set = 0, binding = 3) writeonly buffer Result {\n"
2478 		"    uint value[];\n"
2479 		"} result;\n"
2480 		"void main()\n"
2481 		"{\n"
2482 		     // payload.y is 0 or 1 so we will add 3 or 7 (just two prime numbers)
2483 		"    atomicAdd(result.value[nonuniformEXT(payload.x)], 3 + payload.y * 4);\n"
2484 		"}\n";
2485 	programCollection.glslSources.add("chit") << glu::ClosestHitSource(chitSource) << glslBuildOptions;
2486 }
2487 
createInstance(Context & context) const2488 TestInstance* RayTracingASDynamicIndexingTestCase::createInstance(Context& context) const
2489 {
2490 	return new RayTracingASDynamicIndexingTestInstance(context);
2491 }
2492 
RayTracingASDynamicIndexingTestInstance(Context & context)2493 RayTracingASDynamicIndexingTestInstance::RayTracingASDynamicIndexingTestInstance(Context& context)
2494 	: vkt::TestInstance(context)
2495 {
2496 }
2497 
iterate(void)2498 tcu::TestStatus RayTracingASDynamicIndexingTestInstance::iterate(void)
2499 {
2500 	const InstanceInterface&	vki							= m_context.getInstanceInterface();
2501 	const DeviceInterface&		vkd							= m_context.getDeviceInterface();
2502 	const VkDevice				device						= m_context.getDevice();
2503 	const VkPhysicalDevice		physicalDevice				= m_context.getPhysicalDevice();
2504 	const deUint32				queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
2505 	const VkQueue				queue						= m_context.getUniversalQueue();
2506 	Allocator&					allocator					= m_context.getDefaultAllocator();
2507 	const deUint32				shaderGroupHandleSize		= getShaderGroupSize(vki, physicalDevice);
2508 	const deUint32				shaderGroupBaseAlignment	= getShaderGroupBaseAlignment(vki, physicalDevice);
2509 	const deUint32				tlasCount					= 500;	// changing this will require also changing shaders
2510 	const deUint32				activeTlasCount				= 32;	// number of tlas out of <tlasCount> that will be active
2511 
2512 	const Move<VkDescriptorSetLayout> descriptorSetLayout = DescriptorSetLayoutBuilder()
2513 		.addArrayBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount, ALL_RAY_TRACING_STAGES)
2514 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)				// pointers to all acceleration structures
2515 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)				// ssbo with indices of all acceleration structures
2516 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)				// ssbo with result values
2517 		.build(vkd, device);
2518 
2519 	const Move<VkDescriptorPool> descriptorPool = DescriptorPoolBuilder()
2520 		.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount)
2521 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2522 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2523 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2524 		.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2525 	const Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
2526 
2527 	de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
2528 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,      createShaderModule(vkd, device, m_context.getBinaryCollection().get("rgen"), 0), 0);
2529 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, createShaderModule(vkd, device, m_context.getBinaryCollection().get("chit"), 0), 1);
2530 
2531 	const Move<VkPipelineLayout>			pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
2532 	Move<VkPipeline>						pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
2533 	de::MovePtr<BufferWithMemory>			raygenShaderBindingTable			= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
2534 	de::MovePtr<BufferWithMemory>			hitShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
2535 
2536 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
2537 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
2538 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
2539 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
2540 
2541 	const VkDeviceSize						pointerBufferSize		= tlasCount * sizeof(VkDeviceAddress);
2542 	const VkBufferCreateInfo				pointerBufferCreateInfo	= makeBufferCreateInfo(pointerBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2543 	de::MovePtr<BufferWithMemory>			pointerBuffer			= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, pointerBufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::DeviceAddress));
2544 
2545 	const VkDeviceSize						indicesBufferSize		= activeTlasCount * sizeof(deUint32);
2546 	const VkBufferCreateInfo				indicesBufferCreateInfo	= makeBufferCreateInfo(indicesBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2547 	de::MovePtr<BufferWithMemory>			indicesBuffer			= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, indicesBufferCreateInfo, MemoryRequirement::HostVisible));
2548 
2549 	const VkDeviceSize						resultBufferSize		= activeTlasCount * sizeof(deUint32) * 4;
2550 	const VkBufferCreateInfo				resultBufferCreateInfo	= makeBufferCreateInfo(resultBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
2551 	de::MovePtr<BufferWithMemory>			resultBuffer			= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
2552 
2553 	const Move<VkCommandPool>				cmdPool					= createCommandPool(vkd, device, 0, queueFamilyIndex);
2554 	const Move<VkCommandBuffer>				cmdBuffer				= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2555 
2556 	de::SharedPtr<BottomLevelAccelerationStructure>				blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2557 	std::vector<de::MovePtr<TopLevelAccelerationStructure>>		tlasVect(tlasCount);
2558 	std::vector<VkDeviceAddress>								tlasPtrVect(tlasCount);
2559 	std::vector<VkAccelerationStructureKHR>						tlasVkVect;
2560 
2561 	// randomly scatter active AS across the range
2562 	deRandom rnd;
2563 	deRandom_init(&rnd, 123);
2564 	std::set<deUint32> asIndicesSet;
2565 	while (asIndicesSet.size() < activeTlasCount)
2566 		asIndicesSet.insert(deRandom_getUint32(&rnd) % tlasCount);
2567 
2568 	// fill indices buffer
2569 	deUint32 helperIndex = 0;
2570 	auto& indicesBufferAlloc	= indicesBuffer->getAllocation();
2571 	deUint32* indicesBufferPtr	= reinterpret_cast<deUint32*>(indicesBufferAlloc.getHostPtr());
2572 	std::for_each(asIndicesSet.begin(), asIndicesSet.end(),
2573 		[&helperIndex, indicesBufferPtr](const deUint32& index)
2574 		{
2575 			indicesBufferPtr[helperIndex++] = index;
2576 		});
2577 	vk::flushAlloc(vkd, device, indicesBufferAlloc);
2578 
2579 	// clear result buffer
2580 	auto& resultBufferAlloc		= resultBuffer->getAllocation();
2581 	void* resultBufferPtr		= resultBufferAlloc.getHostPtr();
2582 	deMemset(resultBufferPtr, 0, static_cast<size_t>(resultBufferSize));
2583 	vk::flushAlloc(vkd, device, resultBufferAlloc);
2584 
2585 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
2586 	{
2587 		// build bottom level acceleration structure
2588 		blas->setGeometryData(
2589 			{
2590 				{ 0.0, 0.0, 0.0 },
2591 				{ 1.0, 0.0, 0.0 },
2592 				{ 0.0, 1.0, 0.0 },
2593 			},
2594 			true,
2595 			VK_GEOMETRY_OPAQUE_BIT_KHR
2596 		);
2597 
2598 		blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2599 
2600 		// build top level acceleration structures
2601 		for (deUint32 tlasIndex = 0; tlasIndex < tlasCount; ++tlasIndex)
2602 		{
2603 			auto& tlas = tlasVect[tlasIndex];
2604 			tlas = makeTopLevelAccelerationStructure();
2605 			tlas->setInstanceCount(1);
2606 			tlas->addInstance(blas);
2607 			if (!asIndicesSet.count(tlasIndex))
2608 			{
2609 				// tlas that are not in asIndicesSet should be empty but it is hard to do
2610 				// that with current cts utils so we are marking them as inactive instead
2611 				tlas->setInactiveInstances(true);
2612 			}
2613 			tlas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2614 
2615 			// get acceleration structure device address
2616 			const VkAccelerationStructureDeviceAddressInfoKHR addressInfo =
2617 			{
2618 				VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,	// VkStructureType				sType
2619 				DE_NULL,															// const void*					pNext
2620 				*tlas->getPtr()														// VkAccelerationStructureKHR	accelerationStructure
2621 			};
2622 			VkDeviceAddress vkda = vkd.getAccelerationStructureDeviceAddressKHR(device, &addressInfo);
2623 			tlasPtrVect[tlasIndex] = vkda;
2624 		}
2625 
2626 		// fill pointer buffer
2627 		vkd.cmdUpdateBuffer(*cmdBuffer, **pointerBuffer, 0, pointerBufferSize, tlasPtrVect.data());
2628 
2629 		// wait for data transfers
2630 		const VkMemoryBarrier bufferUploadBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
2631 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &bufferUploadBarrier, 1u);
2632 
2633 		// wait for as build
2634 		const VkMemoryBarrier asBuildBarrier = makeMemoryBarrier(VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR);
2635 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &asBuildBarrier, 1u);
2636 
2637 		tlasVkVect.reserve(tlasCount);
2638 		for (auto& tlas : tlasVect)
2639 			tlasVkVect.push_back(*tlas->getPtr());
2640 
2641 		VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet =
2642 		{
2643 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	// VkStructureType						sType;
2644 			DE_NULL,															// const void*							pNext;
2645 			tlasCount,															// deUint32								accelerationStructureCount;
2646 			tlasVkVect.data(),													// const VkAccelerationStructureKHR*	pAccelerationStructures;
2647 		};
2648 
2649 		const vk::VkDescriptorBufferInfo pointerBufferInfo	= makeDescriptorBufferInfo(**pointerBuffer, 0u, VK_WHOLE_SIZE);
2650 		const vk::VkDescriptorBufferInfo indicesBufferInfo	= makeDescriptorBufferInfo(**indicesBuffer, 0u, VK_WHOLE_SIZE);
2651 		const vk::VkDescriptorBufferInfo resultInfo			= makeDescriptorBufferInfo(**resultBuffer,  0u, VK_WHOLE_SIZE);
2652 
2653 		DescriptorSetUpdateBuilder()
2654 			.writeArray (*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount, &accelerationStructureWriteDescriptorSet)
2655 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &pointerBufferInfo)
2656 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indicesBufferInfo)
2657 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(3u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo)
2658 			.update(vkd, device);
2659 
2660 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
2661 
2662 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
2663 
2664 		cmdTraceRays(vkd,
2665 			*cmdBuffer,
2666 			&raygenShaderBindingTableRegion,
2667 			&missShaderBindingTableRegion,
2668 			&hitShaderBindingTableRegion,
2669 			&callableShaderBindingTableRegion,
2670 			activeTlasCount, 1, 1);
2671 
2672 		const VkMemoryBarrier postTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
2673 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
2674 	}
2675 	endCommandBuffer(vkd, *cmdBuffer);
2676 
2677 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2678 
2679 	invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(), resultBuffer->getAllocation().getOffset(), resultBufferSize);
2680 
2681 	// verify result buffer
2682 	deUint32		failures	= 0;
2683 	const deUint32*	resultPtr	= reinterpret_cast<deUint32*>(resultBuffer->getAllocation().getHostPtr());
2684 	for (deUint32 index = 0; index < activeTlasCount; ++index)
2685 	{
2686 		failures += (resultPtr[0 * activeTlasCount + index] != 2) +
2687 					(resultPtr[1 * activeTlasCount + index] != 3) +
2688 					(resultPtr[2 * activeTlasCount + index] != 5) +
2689 					(resultPtr[3 * activeTlasCount + index] != 7);
2690 	}
2691 
2692 	if (failures)
2693 		return tcu::TestStatus::fail(de::toString(failures) + " failures, " + de::toString(4 * activeTlasCount - failures) + " are ok");
2694 	return tcu::TestStatus::pass("Pass");
2695 }
2696 
2697 // Tests the vkGetDeviceAccelerationStructureKHR routine
2698 class RayTracingDeviceASCompabilityKHRTestInstance : public TestInstance
2699 {
2700 public:
RayTracingDeviceASCompabilityKHRTestInstance(Context & context,const de::SharedPtr<TestParams> params)2701 					RayTracingDeviceASCompabilityKHRTestInstance	(Context& context, const de::SharedPtr<TestParams> params)
2702 						: TestInstance	(context)
2703 						, m_params		(params)
2704 					{
2705 					}
2706 
2707 	tcu::TestStatus	iterate											(void) override;
2708 
2709 protected:
2710 	template<class ASType>
2711 		bool		performTest										(VkCommandPool								cmdPool,
2712 																	 VkCommandBuffer							cmdBuffer,
2713 																	 const std::vector<de::SharedPtr<ASType>>	sourceStructures,
2714 																	 const std::vector<VkDeviceSize>&			copySizes,
2715 																	 const std::vector<VkDeviceSize>&			compactSizes);
2716 
2717 	VkAccelerationStructureCompatibilityKHR
2718 					getDeviceASCompatibilityKHR						(const deUint8*		versionInfoData);
2719 	std::string		getUUIDsString									(const deUint8* header) const;
2720 
2721 
2722 private:
2723 	const de::SharedPtr<TestParams>	m_params;
2724 };
2725 
2726 // Tests for updating botto-level AS(s) address(es) in top-level AS's header
2727 class RayTracingHeaderBottomAddressTestInstance : public TestInstance
2728 {
2729 public:
RayTracingHeaderBottomAddressTestInstance(Context & context,const de::SharedPtr<TestParams> params)2730 					RayTracingHeaderBottomAddressTestInstance						(Context&											context,
2731 																					 const de::SharedPtr<TestParams>					params)
2732 						: TestInstance	(context)
2733 						, m_params		(params)
2734 					{
2735 					}
2736 	tcu::TestStatus	iterate															(void) override;
2737 
2738 protected:
2739 	de::SharedPtr<TopLevelAccelerationStructure>	prepareTopAccelerationStructure	(const DeviceInterface&								vk,
2740 																					 VkDevice											device,
2741 																					 Allocator&											allocator,
2742 																					 VkCommandBuffer									cmdBuffer);
2743 
2744 	bool											areAddressesTheSame				(const std::vector<deUint64>&						addresses,
2745 																					 const SerialStorage::AccelerationStructureHeader*	header);
2746 
2747 	bool											areAddressesDifferent			(const std::vector<deUint64>&						addresses1,
2748 																					 const std::vector<deUint64>&						addresses2);
2749 private:
2750 	const de::SharedPtr<TestParams>	m_params;
2751 };
2752 
2753 class RayTracingDeviceASCompabilityKHRTestCase : public TestCase
2754 {
2755 public:
RayTracingDeviceASCompabilityKHRTestCase(tcu::TestContext & ctx,const char * name,const de::SharedPtr<TestParams> params)2756 					RayTracingDeviceASCompabilityKHRTestCase	(tcu::TestContext& ctx, const char* name, const de::SharedPtr<TestParams> params)
2757 						: TestCase(ctx, name)
2758 						, m_params(params)
2759 					{
2760 					}
2761 
2762 	void			checkSupport								(Context&			context) const override;
createInstance(Context & context) const2763 	TestInstance*	createInstance								(Context&			context) const override
2764 	{
2765 		return new RayTracingDeviceASCompabilityKHRTestInstance(context, m_params);
2766 	}
2767 
2768 private:
2769 	de::SharedPtr<TestParams>	m_params;
2770 };
2771 
2772 class RayTracingHeaderBottomAddressTestCase : public TestCase
2773 {
2774 public:
RayTracingHeaderBottomAddressTestCase(tcu::TestContext & ctx,const char * name,const de::SharedPtr<TestParams> params)2775 					RayTracingHeaderBottomAddressTestCase	(tcu::TestContext& ctx, const char* name, const de::SharedPtr<TestParams> params)
2776 						: TestCase(ctx, name)
2777 						, m_params(params)
2778 					{
2779 					}
2780 
2781 	void			checkSupport								(Context&			context) const override;
createInstance(Context & context) const2782 	TestInstance*	createInstance								(Context&			context) const override
2783 	{
2784 		return new RayTracingHeaderBottomAddressTestInstance(context, m_params);
2785 	}
2786 
2787 private:
2788 	de::SharedPtr<TestParams>	m_params;
2789 };
2790 
checkSupport(Context & context) const2791 void RayTracingDeviceASCompabilityKHRTestCase ::checkSupport (Context& context) const
2792 {
2793 	context.requireInstanceFunctionality("VK_KHR_get_physical_device_properties2");
2794 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
2795 
2796 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
2797 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
2798 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
2799 
2800 	// Check supported vertex format.
2801 	checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(), m_params->vertexFormat);
2802 }
2803 
checkSupport(Context & context) const2804 void RayTracingHeaderBottomAddressTestCase ::checkSupport (Context& context) const
2805 {
2806 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
2807 
2808 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
2809 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
2810 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
2811 
2812 	// Check supported vertex format.
2813 	checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(), m_params->vertexFormat);
2814 }
2815 
getDeviceASCompatibilityKHR(const deUint8 * versionInfoData)2816 VkAccelerationStructureCompatibilityKHR	RayTracingDeviceASCompabilityKHRTestInstance::getDeviceASCompatibilityKHR (const deUint8* versionInfoData)
2817 {
2818 	const VkDevice								device		= m_context.getDevice();
2819 	const DeviceInterface&						vkd			= m_context.getDeviceInterface();
2820 
2821 	VkAccelerationStructureCompatibilityKHR		compability = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_MAX_ENUM_KHR;
2822 
2823 	const VkAccelerationStructureVersionInfoKHR versionInfo =
2824 	{
2825 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_VERSION_INFO_KHR,	// sType
2826 		DE_NULL,													// pNext
2827 		versionInfoData												// pVersionData
2828 	};
2829 
2830 	vkd.getDeviceAccelerationStructureCompatibilityKHR(device, &versionInfo, &compability);
2831 
2832 	return compability;
2833 }
2834 
getUUIDsString(const deUint8 * header) const2835 std::string RayTracingDeviceASCompabilityKHRTestInstance::getUUIDsString (const deUint8* header) const
2836 {
2837 	std::stringstream		ss;
2838 
2839 	int			offset		= 0;
2840 	const int	widths[]	= { 4, 2, 2, 2, 6 };
2841 
2842 	for (int h = 0; h < 2; ++h)
2843 	{
2844 		if (h) ss << ' ';
2845 
2846 		for (int w = 0; w < DE_LENGTH_OF_ARRAY(widths); ++w)
2847 		{
2848 			if (w) ss << '-';
2849 
2850 			for (int i = 0; i < widths[w]; ++i)
2851 				ss << std::hex << std::uppercase << static_cast<int>(header[i + offset]);
2852 
2853 			offset += widths[w];
2854 		}
2855 	}
2856 
2857 	return ss.str();
2858 }
2859 
iterate(void)2860 tcu::TestStatus RayTracingDeviceASCompabilityKHRTestInstance::iterate (void)
2861 {
2862 	const DeviceInterface&			vkd					= m_context.getDeviceInterface();
2863 	const VkDevice					device				= m_context.getDevice();
2864 	const deUint32					queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
2865 	const VkQueue					queue				= m_context.getUniversalQueue();
2866 	Allocator&						allocator			= m_context.getDefaultAllocator();
2867 
2868 	const Move<VkCommandPool>		cmdPool				= createCommandPool(vkd, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
2869 	const Move<VkCommandBuffer>		cmdBuffer			= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2870 
2871 	bool							result				= false;
2872 
2873 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomStructures;
2874 	std::vector<VkAccelerationStructureKHR>							bottomHandles;
2875 	std::vector<de::SharedPtr<TopLevelAccelerationStructure>>		topStructures;
2876 	std::vector<VkAccelerationStructureKHR>							topHandles;
2877 	Move<VkQueryPool>												queryPoolCompact;
2878 	Move<VkQueryPool>												queryPoolSerial;
2879 	std::vector<VkDeviceSize>										compactSizes;
2880 	std::vector<VkDeviceSize>										serialSizes;
2881 
2882 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
2883 
2884 	bottomStructures = m_params->testConfiguration->initBottomAccelerationStructures(m_context, *m_params);
2885 	for (auto& blas : bottomStructures)
2886 	{
2887 		blas->setBuildType(m_params->buildType);
2888 		blas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
2889 		blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2890 		bottomHandles.push_back(*(blas->getPtr()));
2891 	}
2892 
2893 	if (m_params->operationTarget == OT_TOP_ACCELERATION)
2894 	{
2895 		de::MovePtr<TopLevelAccelerationStructure> tlas = m_params->testConfiguration->initTopAccelerationStructure(m_context, *m_params, bottomStructures);
2896 		tlas->setBuildType					(m_params->buildType);
2897 		tlas->setBuildFlags				(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
2898 		tlas->createAndBuild				(vkd, device, *cmdBuffer, allocator);
2899 		topHandles.push_back							(*(tlas->getPtr()));
2900 		topStructures.push_back(de::SharedPtr<TopLevelAccelerationStructure>(tlas.release()));
2901 	}
2902 
2903 	const deUint32 queryCount = deUint32((m_params->operationTarget == OT_BOTTOM_ACCELERATION) ? bottomStructures.size() : topStructures.size());
2904 	const std::vector<VkAccelerationStructureKHR>& handles = (m_params->operationTarget == OT_BOTTOM_ACCELERATION) ? bottomHandles : topHandles;
2905 
2906 	// query compact size
2907 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2908 		queryPoolCompact = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount);
2909 	queryAccelerationStructureSize(vkd, device, *cmdBuffer, handles, m_params->buildType, *queryPoolCompact, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, compactSizes);
2910 
2911 	// query serialization size
2912 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2913 		queryPoolSerial = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
2914 	queryAccelerationStructureSize(vkd, device, *cmdBuffer, handles, m_params->buildType, queryPoolSerial.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, serialSizes);
2915 
2916 	endCommandBuffer(vkd, *cmdBuffer);
2917 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2918 
2919 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2920 	{
2921 		VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolCompact, 0u, queryCount, queryCount * sizeof(VkDeviceSize), compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2922 		VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolSerial, 0u, queryCount, queryCount * sizeof(VkDeviceSize), serialSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2923 
2924 		vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2925 	}
2926 
2927 	if (m_params->operationTarget == OT_BOTTOM_ACCELERATION)
2928 		result = performTest<BottomLevelAccelerationStructure>(*cmdPool, *cmdBuffer, bottomStructures, compactSizes, serialSizes);
2929 	else
2930 		result = performTest<TopLevelAccelerationStructure>(*cmdPool, *cmdBuffer, topStructures, compactSizes, serialSizes);
2931 
2932 	return result ? tcu::TestStatus::pass("") : tcu::TestStatus::fail("");
2933 }
2934 
2935 template<class ASType>
performTest(VkCommandPool cmdPool,VkCommandBuffer cmdBuffer,const std::vector<de::SharedPtr<ASType>> sourceStructures,const std::vector<VkDeviceSize> & compactSizes,const std::vector<VkDeviceSize> & serialSizes)2936 bool RayTracingDeviceASCompabilityKHRTestInstance::performTest (VkCommandPool								cmdPool,
2937 																VkCommandBuffer								cmdBuffer,
2938 																const std::vector<de::SharedPtr<ASType>>	sourceStructures,
2939 																const std::vector<VkDeviceSize>&			compactSizes,
2940 																const std::vector<VkDeviceSize>&			serialSizes)
2941 {
2942 	const VkQueue								queue					= m_context.getUniversalQueue();
2943 	const VkDevice								device					= m_context.getDevice();
2944 	const DeviceInterface&						vkd						= m_context.getDeviceInterface();
2945 	Allocator&									allocator				= m_context.getDefaultAllocator();
2946 
2947 	const deUint32								sourceStructuresCount	= deUint32(sourceStructures.size());
2948 
2949 	Move<VkQueryPool>							queryPoolCompactSerial;
2950 	std::vector<VkDeviceSize>					compactSerialSizes;
2951 
2952 	std::vector<VkAccelerationStructureKHR>		compactHandles;
2953 	std::vector<de::SharedPtr<ASType>>			compactStructures;
2954 
2955 	std::vector<de::SharedPtr<SerialStorage>>	sourceSerialized;
2956 	std::vector<de::SharedPtr<SerialStorage>>	compactSerialized;
2957 
2958 
2959 	// make compact copy of acceleration structure
2960 	{
2961 		beginCommandBuffer(vkd, cmdBuffer, 0u);
2962 
2963 		for (size_t i = 0; i < sourceStructuresCount; ++i)
2964 		{
2965 			de::MovePtr<ASType> asCopy = makeAccelerationStructure<ASType>();
2966 			asCopy->setBuildType(m_params->buildType);
2967 			asCopy->createAndCopyFrom(vkd, device, cmdBuffer, allocator, sourceStructures[i].get(), compactSizes[i], 0u);
2968 			compactHandles.push_back(*(asCopy->getPtr()));
2969 			compactStructures.push_back(de::SharedPtr<ASType>(asCopy.release()));
2970 		}
2971 
2972 		// query serialization size of compact acceleration structures
2973 		if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2974 			queryPoolCompactSerial = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, sourceStructuresCount);
2975 		queryAccelerationStructureSize(vkd, device, cmdBuffer, compactHandles, m_params->buildType, *queryPoolCompactSerial, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, compactSerialSizes);
2976 
2977 		endCommandBuffer(vkd, cmdBuffer);
2978 		submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2979 
2980 		if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2981 		{
2982 			VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolCompactSerial, 0u, sourceStructuresCount, (sourceStructuresCount * sizeof(VkDeviceSize)), compactSerialSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2983 			vkd.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2984 		}
2985 	}
2986 
2987 	// serialize both structures to memory
2988 	{
2989 		beginCommandBuffer(vkd, cmdBuffer, 0u);
2990 
2991 		for (size_t i = 0 ; i < sourceStructuresCount; ++i)
2992 		{
2993 			sourceSerialized.push_back(de::SharedPtr<SerialStorage>(new SerialStorage(vkd, device, allocator, m_params->buildType, serialSizes[i])));
2994 			sourceStructures[i]->serialize(vkd, device, cmdBuffer, sourceSerialized.back().get());
2995 
2996 			compactSerialized.push_back(de::SharedPtr<SerialStorage>(new SerialStorage(vkd, device, allocator, m_params->buildType, compactSerialSizes[i])));
2997 			compactStructures[i]->serialize(vkd, device, cmdBuffer, compactSerialized.back().get());
2998 		}
2999 
3000 		endCommandBuffer(vkd, cmdBuffer);
3001 		submitCommandsAndWait(vkd, device, queue, cmdBuffer);
3002 	}
3003 
3004 	// verify compatibility
3005 	bool result = true;
3006 	for (size_t i = 0; result && (i < sourceStructuresCount); ++i)
3007 	{
3008 		const deUint8* s_header = static_cast<const deUint8*>(sourceSerialized[i]->getHostAddressConst().hostAddress);
3009 		const deUint8* c_header = static_cast<const deUint8*>(compactSerialized[i]->getHostAddressConst().hostAddress);
3010 
3011 		const auto s_compability = getDeviceASCompatibilityKHR(s_header);
3012 		const auto c_compability = getDeviceASCompatibilityKHR(c_header);
3013 
3014 		result &= ((s_compability == c_compability) && (s_compability == VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR));
3015 
3016 		if (!result)
3017 		{
3018 			tcu::TestLog& log = m_context.getTestContext().getLog();
3019 
3020 			log << tcu::TestLog::Message << getUUIDsString(s_header) << " serialized AS compability failed" << tcu::TestLog::EndMessage;
3021 			log << tcu::TestLog::Message << getUUIDsString(c_header) << " compact AS compability failed" << tcu::TestLog::EndMessage;
3022 		}
3023 	}
3024 
3025 	return result;
3026 }
3027 
3028 de::SharedPtr<TopLevelAccelerationStructure>
prepareTopAccelerationStructure(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer)3029 RayTracingHeaderBottomAddressTestInstance::prepareTopAccelerationStructure (const DeviceInterface&	vk,
3030 																			VkDevice				device,
3031 																			Allocator&				allocator,
3032 																			VkCommandBuffer			cmdBuffer)
3033 {
3034 	const std::vector<tcu::Vec3>									geometryData =
3035 	{
3036 		{ 0.0, 0.0, 0.0 },
3037 		{ 1.0, 0.0, 0.0 },
3038 		{ 0.0, 1.0, 0.0 },
3039 	};
3040 
3041 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottoms;
3042 
3043 	if (TopTestType::IDENTICAL_INSTANCES == m_params->topTestType)
3044 	{
3045 		auto blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
3046 		blas->setBuildType(m_params->buildType);
3047 		blas->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3048 		blas->createAndBuild(vk, device, cmdBuffer, allocator);
3049 		for (deUint32 i = 0; i < m_params->width; ++i)
3050 		{
3051 			bottoms.emplace_back(blas);
3052 		}
3053 	}
3054 	else if (TopTestType::DIFFERENT_INSTANCES == m_params->topTestType)
3055 	{
3056 		for (deUint32 i = 0; i < m_params->width; ++i)
3057 		{
3058 			auto blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
3059 			blas->setBuildType(m_params->buildType);
3060 			blas->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3061 			blas->createAndBuild(vk, device, cmdBuffer, allocator);
3062 			bottoms.emplace_back(blas);
3063 		}
3064 	}
3065 	else // TTT_MIX_INSTANCES == m_params->topTestType
3066 	{
3067 		for (deUint32 i = 0; i < m_params->width; ++i)
3068 		{
3069 			{
3070 				auto blas1 = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
3071 				blas1->setBuildType(m_params->buildType);
3072 				blas1->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3073 				blas1->createAndBuild(vk, device, cmdBuffer, allocator);
3074 				bottoms.emplace_back(blas1);
3075 			}
3076 
3077 			{
3078 				auto blas2 = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
3079 				blas2->setBuildType(m_params->buildType);
3080 				blas2->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3081 				blas2->createAndBuild(vk, device, cmdBuffer, allocator);
3082 				bottoms.emplace_back(blas2);
3083 			}
3084 		}
3085 
3086 	}
3087 
3088 	const std::size_t												instanceCount = bottoms.size();
3089 
3090 	de::MovePtr<TopLevelAccelerationStructure>						tlas = makeTopLevelAccelerationStructure();
3091 	tlas->setBuildType(m_params->buildType);
3092 	tlas->setInstanceCount(instanceCount);
3093 
3094 	for (std::size_t i = 0; i < instanceCount; ++i)
3095 	{
3096 		const VkTransformMatrixKHR	transformMatrixKHR =
3097 		{
3098 			{	//  float	matrix[3][4];
3099 				{ 1.0f, 0.0f, 0.0f, (float)i },
3100 				{ 0.0f, 1.0f, 0.0f, (float)i },
3101 				{ 0.0f, 0.0f, 1.0f, 0.0f },
3102 			}
3103 		};
3104 		tlas->addInstance(bottoms[i], transformMatrixKHR, 0, m_params->cullMask, 0u, getCullFlags((m_params->cullFlags)));
3105 	}
3106 
3107 	tlas->createAndBuild(vk, device, cmdBuffer, allocator);
3108 
3109 	return de::SharedPtr<TopLevelAccelerationStructure>(tlas.release());
3110 }
3111 
iterate(void)3112 tcu::TestStatus RayTracingHeaderBottomAddressTestInstance::iterate (void)
3113 {
3114 	const DeviceInterface&								vkd				= m_context.getDeviceInterface();
3115 	const VkDevice										device			= m_context.getDevice();
3116 	const deUint32										familyIndex		= m_context.getUniversalQueueFamilyIndex();
3117 	const VkQueue										queue			= m_context.getUniversalQueue();
3118 	Allocator&											allocator		= m_context.getDefaultAllocator();
3119 
3120 	const Move<VkCommandPool>							cmdPool			= createCommandPool(vkd, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, familyIndex);
3121 	const Move<VkCommandBuffer>							cmdBuffer		= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3122 
3123 	beginCommandBuffer(vkd, *cmdBuffer, 0);
3124 	de::SharedPtr<TopLevelAccelerationStructure>		src				= prepareTopAccelerationStructure(vkd, device, allocator, *cmdBuffer);
3125 	endCommandBuffer(vkd, *cmdBuffer);
3126 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
3127 
3128 	de::MovePtr<TopLevelAccelerationStructure>			dst				= makeTopLevelAccelerationStructure();
3129 
3130 	const std::vector<deUint64>							inAddrs			= src->getSerializingAddresses(vkd, device);
3131 	const std::vector<VkDeviceSize>						inSizes			= src->getSerializingSizes(vkd, device, queue, familyIndex);
3132 
3133 	const SerialInfo									serialInfo		(inAddrs, inSizes);
3134 	SerialStorage										deepStorage		(vkd, device, allocator, m_params->buildType, serialInfo);
3135 
3136 	// make deep serialization - top-level AS width bottom-level structures that it owns
3137 	vkd.resetCommandBuffer(*cmdBuffer, 0);
3138 	beginCommandBuffer(vkd, *cmdBuffer, 0);
3139 	src->serialize(vkd, device, *cmdBuffer, &deepStorage);
3140 	endCommandBuffer(vkd, *cmdBuffer);
3141 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
3142 
3143 	// deserialize all from the previous step to a new top-level AS
3144 	// bottom-level structure addresses should be updated when deep data is deserialized
3145 	vkd.resetCommandBuffer(*cmdBuffer, 0);
3146 	beginCommandBuffer(vkd, *cmdBuffer, 0);
3147 	dst->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, &deepStorage);
3148 	endCommandBuffer(vkd, *cmdBuffer);
3149 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
3150 
3151 	SerialStorage										shallowStorage	(vkd, device, allocator, m_params->buildType, inSizes[0]);
3152 
3153 	// make shallow serialization - only top-level AS without bottom-level structures
3154 	vkd.resetCommandBuffer(*cmdBuffer, 0);
3155 	beginCommandBuffer(vkd, *cmdBuffer, 0);
3156 	dst->serialize(vkd, device, *cmdBuffer, &shallowStorage);
3157 	endCommandBuffer(vkd, *cmdBuffer);
3158 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
3159 
3160 	// get data to verification
3161 	const std::vector<deUint64>							outAddrs		= dst->getSerializingAddresses(vkd, device);
3162 	const SerialStorage::AccelerationStructureHeader*	header			= shallowStorage.getASHeader();
3163 
3164 	return (areAddressesDifferent(inAddrs, outAddrs) && areAddressesTheSame(outAddrs, header)) ? tcu::TestStatus::pass("") : tcu::TestStatus::fail("");
3165 }
3166 
areAddressesTheSame(const std::vector<deUint64> & addresses,const SerialStorage::AccelerationStructureHeader * header)3167 bool RayTracingHeaderBottomAddressTestInstance::areAddressesTheSame (const std::vector<deUint64>& addresses, const SerialStorage::AccelerationStructureHeader* header)
3168 {
3169 	const deUint32 cbottoms = deUint32(addresses.size() - 1);
3170 
3171 	// header should contain the same number of handles as serialized/deserialized top-level AS
3172 	if (cbottoms != header->handleCount) return false;
3173 
3174 	std::set<deUint64> refAddrs;
3175 	std::set<deUint64> checkAddrs;
3176 
3177 	// distinct, squach and sort address list
3178 	for (deUint32 i = 0; i < cbottoms; ++i)
3179 	{
3180 		refAddrs.insert(addresses[i+1]);
3181 		checkAddrs.insert(header->handleArray[i]);
3182 	}
3183 
3184 	return std::equal(refAddrs.begin(), refAddrs.end(), checkAddrs.begin());
3185 }
3186 
areAddressesDifferent(const std::vector<deUint64> & addresses1,const std::vector<deUint64> & addresses2)3187 bool RayTracingHeaderBottomAddressTestInstance::areAddressesDifferent (const std::vector<deUint64>& addresses1, const std::vector<deUint64>& addresses2)
3188 {
3189 	// the number of addresses must be equal
3190 	if (addresses1.size() != addresses2.size())
3191 		return false;
3192 
3193 	// adresses of top-level AS must differ
3194 	if (addresses1[0] == addresses2[0])
3195 		return false;
3196 
3197 	std::set<deUint64>	addrs1;
3198 	std::set<deUint64>	addrs2;
3199 	deUint32			matches		= 0;
3200 	const deUint32		cbottoms	= deUint32(addresses1.size() - 1);
3201 
3202 	for (deUint32 i = 0; i < cbottoms; ++i)
3203 	{
3204 		addrs1.insert(addresses1[i+1]);
3205 		addrs2.insert(addresses2[i+1]);
3206 	}
3207 
3208 	// the first addresses set must not contain any address from the second addresses set
3209 	for (auto& addr1 : addrs1)
3210 	{
3211 		if (addrs2.end() != addrs2.find(addr1))
3212 			++matches;
3213 	}
3214 
3215 	return (matches == 0);
3216 }
3217 
3218 template<class X, class... Y>
makeShared(Y &&...ctorArgs)3219 inline de::SharedPtr<X> makeShared(Y&&... ctorArgs) {
3220 	return de::SharedPtr<X>(new X(std::forward<Y>(ctorArgs)...));
3221 }
3222 template<class X, class... Y>
makeMovePtr(Y &&...ctorArgs)3223 inline de::MovePtr<X> makeMovePtr(Y&&... ctorArgs) {
3224 	return de::MovePtr<X>(new X(std::forward<Y>(ctorArgs)...));
3225 }
3226 template<class X>
makeSharedFrom(const X & x)3227 inline de::SharedPtr<X> makeSharedFrom(const X& x) {
3228 	return makeShared<X>(x);
3229 }
3230 
3231 struct QueryPoolResultsParams
3232 {
3233 	enum class Type
3234 	{
3235 		StructureSize,
3236 		PointerCount
3237 	}									queryType;
3238 	VkAccelerationStructureBuildTypeKHR	buildType;
3239 	deUint32							blasCount;
3240 	bool								inVkBuffer;
3241 	bool								compacted;
3242 };
3243 
3244 typedef de::SharedPtr<const QueryPoolResultsParams> QueryPoolResultsParamsPtr;
3245 
3246 struct ASInterface;
3247 typedef de::SharedPtr<ASInterface> ASInterfacePtr;
3248 
3249 class QueryPoolResultsInstance : public TestInstance
3250 {
3251 public:
3252 	using TlasPtr = de::SharedPtr<TopLevelAccelerationStructure>;
3253 	using BlasPtr = de::SharedPtr<BottomLevelAccelerationStructure>;
3254 
QueryPoolResultsInstance(Context & context,QueryPoolResultsParamsPtr params)3255 				QueryPoolResultsInstance	(Context&						context,
3256 											 QueryPoolResultsParamsPtr		params)
3257 					: TestInstance	(context)
3258 					, m_params		(params) {}
3259 	auto		prepareBottomAccStructures	(const DeviceInterface&			vk,
3260 											 VkDevice						device,
3261 											 Allocator&						allocator,
3262 											 VkCommandBuffer				cmdBuffer) ->std::vector<BlasPtr>;
3263 	TlasPtr		prepareTopAccStructure		(const DeviceInterface&			vk,
3264 											 VkDevice						device,
3265 											 Allocator&						allocator,
3266 											 VkCommandBuffer				cmdBuffer,
3267 											 const std::vector<BlasPtr>&	bottoms);
3268 protected:
3269 	const QueryPoolResultsParamsPtr	m_params;
3270 };
3271 
3272 struct ASInterface
3273 {
3274 	virtual VkAccelerationStructureKHR getPtr() const = 0;
3275 	virtual VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const = 0;
3276 	virtual ASInterfacePtr clone (Context& ctx, VkAccelerationStructureBuildTypeKHR buildType, const VkCommandBuffer cmd, VkDeviceSize size) = 0;
3277 };
3278 
3279 template<class> struct ASAllocator;
3280 template<> struct ASAllocator<QueryPoolResultsInstance::TlasPtr>
3281 {
3282 	typedef QueryPoolResultsInstance::TlasPtr TlasPtr;
allocvkt::RayTracing::__anon31ef80ca0111::ASAllocator3283 	static TlasPtr alloc() { return TlasPtr(makeTopLevelAccelerationStructure().release()); }
3284 };
3285 template<> struct ASAllocator<QueryPoolResultsInstance::BlasPtr>
3286 {
3287 	typedef QueryPoolResultsInstance::BlasPtr BlasPtr;
allocvkt::RayTracing::__anon31ef80ca0111::ASAllocator3288 	static BlasPtr alloc() { return BlasPtr(makeBottomLevelAccelerationStructure().release()); }
3289 };
3290 
3291 template<class SharedPtrType> struct ASInterfaceImpl : ASInterface
3292 {
3293 	SharedPtrType	m_source;
ASInterfaceImplvkt::RayTracing::__anon31ef80ca0111::ASInterfaceImpl3294 	ASInterfaceImpl (SharedPtrType src) : m_source(src) {}
getPtrvkt::RayTracing::__anon31ef80ca0111::ASInterfaceImpl3295 	virtual VkAccelerationStructureKHR getPtr() const override
3296 	{
3297 		return *m_source->getPtr();
3298 	}
getStructureBuildSizesvkt::RayTracing::__anon31ef80ca0111::ASInterfaceImpl3299 	virtual VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const override
3300 	{
3301 		return m_source->getStructureBuildSizes();
3302 	}
clonevkt::RayTracing::__anon31ef80ca0111::ASInterfaceImpl3303 	virtual ASInterfacePtr clone (Context& ctx, VkAccelerationStructureBuildTypeKHR buildType, const VkCommandBuffer cmd, VkDeviceSize size) override
3304 	{
3305 		const DeviceInterface&	vk				= ctx.getDeviceInterface();
3306 		const VkDevice			device			= ctx.getDevice();
3307 		Allocator&				allocator		= ctx.getDefaultAllocator();
3308 
3309 		auto ptr = ASAllocator<SharedPtrType>::alloc();
3310 		ptr->setBuildType(buildType);
3311 		ptr->setBuildFlags(m_source->getBuildFlags());
3312 		ptr->create(vk, device, allocator, size);
3313 		ptr->copyFrom(vk, device, cmd, m_source.get(), false);
3314 		return de::SharedPtr<ASInterface>(new ASInterfaceImpl(ptr));
3315 	}
3316 };
3317 
makeASInterfacePtr(SharedPtrType asPtr)3318 template<class SharedPtrType> ASInterfacePtr makeASInterfacePtr (SharedPtrType asPtr)
3319 {
3320 	return ASInterfacePtr(new ASInterfaceImpl<SharedPtrType>(asPtr));
3321 }
3322 
3323 class QueryPoolResultsSizeInstance : public QueryPoolResultsInstance
3324 {
3325 public:
QueryPoolResultsSizeInstance(Context & context,QueryPoolResultsParamsPtr params)3326 				QueryPoolResultsSizeInstance	(Context&										context,
3327 												 QueryPoolResultsParamsPtr						params)
3328 					: QueryPoolResultsInstance	(context, params) {}
3329 	TestStatus	iterate							(void) override;
3330 	auto		makeCopyOfStructures			(const std::vector<ASInterfacePtr>&				structs,
3331 												 const std::vector<VkDeviceSize>				sizes) -> std::vector<ASInterfacePtr>;
3332 	auto		getStructureSizes				(const std::vector<VkAccelerationStructureKHR>&	handles) -> std::vector<VkDeviceSize>;
3333 };
3334 
3335 class QueryPoolResultsPointersInstance : public QueryPoolResultsInstance
3336 {
3337 public:
QueryPoolResultsPointersInstance(Context & context,QueryPoolResultsParamsPtr params)3338 				QueryPoolResultsPointersInstance (Context& context, QueryPoolResultsParamsPtr params)
3339 					: QueryPoolResultsInstance(context, params) {}
3340 
3341 	TestStatus	iterate							  (void) override;
3342 };
3343 
3344 class QueryPoolResultsCase : public TestCase
3345 {
3346 public:
QueryPoolResultsCase(TestContext & ctx,const char * name,QueryPoolResultsParamsPtr params)3347 					QueryPoolResultsCase	(TestContext&				ctx,
3348 											 const char*				name,
3349 											 QueryPoolResultsParamsPtr	params)
3350 						: TestCase(ctx, name)
3351 						, m_params(params) {}
3352 	void			checkSupport			(Context&					context) const override;
3353 	TestInstance*	createInstance			(Context&					context) const override;
3354 
3355 	template<class T, class P = T(*)[1], class R = decltype(std::begin(*std::declval<P>()))>
makeStdBeginEnd(void * p,deUint32 n)3356 	static auto makeStdBeginEnd(void* p, deUint32 n) -> std::pair<R, R>
3357 	{
3358 		auto tmp = std::begin(*P(p));
3359 		auto begin = tmp;
3360 		std::advance(tmp, n);
3361 		return { begin, tmp };
3362 	}
3363 
3364 private:
3365 	const QueryPoolResultsParamsPtr	m_params;
3366 };
3367 
createInstance(Context & context) const3368 TestInstance* QueryPoolResultsCase::createInstance (Context& context) const
3369 {
3370 	switch (m_params->queryType)
3371 	{
3372 		case QueryPoolResultsParams::Type::StructureSize:	return new QueryPoolResultsSizeInstance(context, m_params);
3373 		case QueryPoolResultsParams::Type::PointerCount:	return new QueryPoolResultsPointersInstance(context, m_params);
3374 	}
3375 	TCU_THROW(InternalError, "Unknown test type");
3376 	return nullptr;
3377 }
3378 
checkSupport(Context & context) const3379 void QueryPoolResultsCase::checkSupport (Context& context) const
3380 {
3381 	context.requireDeviceFunctionality(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME);
3382 	context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME);
3383 
3384 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
3385 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
3386 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
3387 
3388 	const VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR& maintenance1FeaturesKHR = context.getRayTracingMaintenance1Features();
3389 	if (maintenance1FeaturesKHR.rayTracingMaintenance1 == VK_FALSE)
3390 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR::rayTracingMaintenance1");
3391 }
3392 
prepareBottomAccStructures(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer)3393 auto QueryPoolResultsInstance::prepareBottomAccStructures (const DeviceInterface&	vk,
3394 														   VkDevice					device,
3395 														   Allocator&				allocator,
3396 														   VkCommandBuffer			cmdBuffer) -> std::vector<BlasPtr>
3397 {
3398 	std::vector<Vec3>		triangle		=
3399 	{
3400 		{ 0.0, 0.0, 0.0 },
3401 		{ 0.5, 0.0, 0.0 },
3402 		{ 0.0, 0.5, 0.0 },
3403 	};
3404 
3405 	const deUint32			triangleCount	= ((1 + m_params->blasCount) * m_params->blasCount) / 2;
3406 	const float				angle			= (4.0f * std::acos(0.0f)) / float(triangleCount);
3407 	auto					rotateCcwZ		= [&](const Vec3& p, const Vec3& center) -> tcu::Vec3
3408 	{
3409 		const float s = std::sin(angle);
3410 		const float c = std::cos(angle);
3411 		const auto  t = p - center;
3412 		return tcu::Vec3(c * t.x() - s * t.y(), s * t.x() + c * t.y(), t.z()) + center;
3413 	};
3414 	auto					nextGeometry	= [&]() -> void
3415 	{
3416 		for (auto& vertex : triangle)
3417 			vertex = rotateCcwZ(vertex, Vec3(0.0f, 0.0f, 0.0f));
3418 	};
3419 
3420 	std::vector<BlasPtr>	bottoms			(m_params->blasCount);
3421 
3422 	for (deUint32 b = 0; b < m_params->blasCount; ++b)
3423 	{
3424 		BlasPtr blas(makeBottomLevelAccelerationStructure().release());
3425 
3426 		blas->setBuildType(m_params->buildType);
3427 		if (m_params->compacted)
3428 		{
3429 			blas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
3430 		}
3431 		blas->addGeometry(triangle, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3432 		for (deUint32 geom = b; geom < m_params->blasCount; ++geom)
3433 		{
3434 			nextGeometry();
3435 			blas->addGeometry(triangle, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3436 		}
3437 
3438 		blas->createAndBuild(vk, device, cmdBuffer, allocator);
3439 
3440 		bottoms[b] = blas;
3441 	}
3442 
3443 	return bottoms;
3444 }
3445 
prepareTopAccStructure(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer,const std::vector<BlasPtr> & bottoms)3446 auto QueryPoolResultsInstance::prepareTopAccStructure (const DeviceInterface&		vk,
3447 													   VkDevice						device,
3448 													   Allocator&					allocator,
3449 													   VkCommandBuffer				cmdBuffer,
3450 													   const std::vector<BlasPtr>&	bottoms) -> TlasPtr
3451 {
3452 	const std::size_t	instanceCount = bottoms.size();
3453 
3454 	de::MovePtr<TopLevelAccelerationStructure>	tlas = makeTopLevelAccelerationStructure();
3455 	tlas->setBuildType(m_params->buildType);
3456 	if (m_params->compacted)
3457 	{
3458 		tlas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
3459 	}
3460 	tlas->setInstanceCount(instanceCount);
3461 
3462 	for (std::size_t i = 0; i < instanceCount; ++i)
3463 	{
3464 		tlas->addInstance(bottoms[i], identityMatrix3x4, 0, 0xFFu, 0u, VkGeometryInstanceFlagsKHR(0));
3465 	}
3466 
3467 	tlas->createAndBuild(vk, device, cmdBuffer, allocator);
3468 
3469 	return TlasPtr(tlas.release());
3470 }
3471 
getStructureSizes(const std::vector<VkAccelerationStructureKHR> & handles)3472 auto QueryPoolResultsSizeInstance::getStructureSizes (const std::vector<VkAccelerationStructureKHR>&	handles) -> std::vector<VkDeviceSize>
3473 {
3474 	const DeviceInterface&				vk				= m_context.getDeviceInterface();
3475 	const VkDevice						device			= m_context.getDevice();
3476 	const deUint32						familyIndex		= m_context.getUniversalQueueFamilyIndex();
3477 	const VkQueue						queue			= m_context.getUniversalQueue();
3478 	Allocator&							allocator		= m_context.getDefaultAllocator();
3479 
3480 	const Move<VkCommandPool>			cmdPool			= createCommandPool(vk, device, 0, familyIndex);
3481 	const Move<VkCommandBuffer>			cmdBuffer		= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3482 
3483 	const deUint32						queryCount		= static_cast<deUint32>(handles.size());
3484 
3485 	Move<VkQueryPool>					queryPoolSize	= makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR, queryCount);
3486 	Move<VkQueryPool>					queryPoolSerial	= makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3487 	Move<VkQueryPool>					queryPoolCompact= m_params->compacted
3488 											? makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount)
3489 											: Move<VkQueryPool>();
3490 
3491 	de::MovePtr<BufferWithMemory>		buffer;
3492 	std::vector<VkDeviceSize>			sizeSizes		(queryCount, 0);
3493 	std::vector<VkDeviceSize>			serialSizes		(queryCount, 0);
3494 	std::vector<VkDeviceSize>			compactSizes	(queryCount, 0);
3495 
3496 	if (m_params->inVkBuffer)
3497 	{
3498 		const auto vci = makeBufferCreateInfo((m_params->compacted ? 3 : 2) * queryCount * sizeof(VkDeviceSize), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3499 		buffer = makeMovePtr<BufferWithMemory>(vk, device, allocator, vci, MemoryRequirement::Coherent | MemoryRequirement::HostVisible);
3500 	}
3501 
3502 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3503 	{
3504 		beginCommandBuffer(vk, *cmdBuffer, 0);
3505 
3506 		vk.cmdResetQueryPool(*cmdBuffer, *queryPoolSize, 0, queryCount);
3507 		vk.cmdResetQueryPool(*cmdBuffer, *queryPoolSerial, 0, queryCount);
3508 		if (m_params->compacted)
3509 		{
3510 			vk.cmdResetQueryPool(*cmdBuffer, *queryPoolCompact, 0, queryCount);
3511 		}
3512 
3513 		vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR, *queryPoolSize, 0);
3514 		vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, *queryPoolSerial, 0);
3515 
3516 		if (m_params->compacted)
3517 		{
3518 			vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, *queryPoolCompact, 0);
3519 		}
3520 
3521 		if (m_params->inVkBuffer)
3522 		{
3523 			vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolSize, 0, queryCount, **buffer, (0 * queryCount * sizeof(VkDeviceSize)),
3524 									   sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3525 			vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolSerial, 0, queryCount, **buffer, (1 * queryCount * sizeof(VkDeviceSize)),
3526 									   sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3527 			if (m_params->compacted)
3528 			{
3529 				vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolCompact, 0, queryCount, **buffer, (2 * queryCount * sizeof(VkDeviceSize)),
3530 											sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3531 			}
3532 		}
3533 		endCommandBuffer(vk, *cmdBuffer);
3534 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3535 
3536 		if (m_params->inVkBuffer)
3537 		{
3538 			Allocation&	alloc		= buffer->getAllocation();
3539 			invalidateMappedMemoryRange(vk, device, alloc.getMemory(), alloc.getOffset(), VK_WHOLE_SIZE);
3540 
3541 			deUint8*	ptrSize		= reinterpret_cast<deUint8*>(alloc.getHostPtr());
3542 			deUint8*	ptrSerial	= ptrSize + queryCount * sizeof(VkDeviceSize);
3543 
3544 			auto		rangeSize	= QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(ptrSize, queryCount);
3545 			auto		rangeSerial	= QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(ptrSerial, queryCount);
3546 
3547 			std::copy_n(rangeSize.first, queryCount, sizeSizes.begin());
3548 			std::copy_n(rangeSerial.first, queryCount, serialSizes.begin());
3549 
3550 			if (m_params->compacted)
3551 			{
3552 				auto	ptrCompact	= ptrSize + 2 * queryCount * sizeof(VkDeviceSize);
3553 				auto	rangeCompact= QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(ptrCompact, queryCount);
3554 				std::copy_n(rangeCompact.first, queryCount, compactSizes.begin());
3555 			}
3556 		}
3557 		else
3558 		{
3559 			VK_CHECK(vk.getQueryPoolResults(device, *queryPoolSize, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3560 											sizeSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3561 			VK_CHECK(vk.getQueryPoolResults(device, *queryPoolSerial, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3562 											serialSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3563 			if (m_params->compacted)
3564 			{
3565 				VK_CHECK(vk.getQueryPoolResults(device, *queryPoolCompact, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3566 												compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3567 			}
3568 		}
3569 	}
3570 	else
3571 	{
3572 		vk.writeAccelerationStructuresPropertiesKHR(device, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR,
3573 													queryCount * sizeof(VkDeviceSize), sizeSizes.data(), sizeof(VkDeviceSize));
3574 		vk.writeAccelerationStructuresPropertiesKHR(device, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR,
3575 													queryCount * sizeof(VkDeviceSize), serialSizes.data(), sizeof(VkDeviceSize));
3576 		if (m_params->compacted)
3577 		{
3578 			vk.writeAccelerationStructuresPropertiesKHR(device, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR,
3579 													queryCount * sizeof(VkDeviceSize), compactSizes.data(), sizeof(VkDeviceSize));
3580 		}
3581 	}
3582 
3583 	sizeSizes.insert(sizeSizes.end(), serialSizes.begin(), serialSizes.end());
3584 	sizeSizes.insert(sizeSizes.end(), compactSizes.begin(), compactSizes.end());
3585 
3586 	return sizeSizes;
3587 }
3588 
makeCopyOfStructures(const std::vector<ASInterfacePtr> & structs,const std::vector<VkDeviceSize> sizes)3589 auto QueryPoolResultsSizeInstance::makeCopyOfStructures (const std::vector<ASInterfacePtr>&	structs,
3590 														 const std::vector<VkDeviceSize>	sizes) -> std::vector<ASInterfacePtr>
3591 {
3592 	const DeviceInterface&				vk				= m_context.getDeviceInterface();
3593 	const VkDevice						device			= m_context.getDevice();
3594 	const VkQueue						queue			= m_context.getUniversalQueue();
3595 
3596 	Move<VkCommandPool>					cmdPool;
3597 	Move<VkCommandBuffer>				cmdBuffer;
3598 
3599 	std::vector<ASInterfacePtr>			copies;
3600 
3601 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3602 	{
3603 		const deUint32	familyIndex	= m_context.getUniversalQueueFamilyIndex();
3604 						cmdPool		= createCommandPool(vk, device, 0, familyIndex);
3605 						cmdBuffer	= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3606 		beginCommandBuffer(vk, *cmdBuffer, 0u);
3607 	}
3608 
3609 	for (auto begin = structs.begin(), i = begin; i != structs.end(); ++i)
3610 	{
3611 		copies.push_back((*i)->clone(m_context, m_params->buildType, *cmdBuffer, sizes.at(std::distance(begin, i))));
3612 	}
3613 
3614 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3615 	{
3616 		endCommandBuffer(vk, *cmdBuffer);
3617 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3618 	}
3619 
3620 	return copies;
3621 }
3622 
iterate(void)3623 TestStatus QueryPoolResultsSizeInstance::iterate (void)
3624 {
3625 	const DeviceInterface&								vk				= m_context.getDeviceInterface();
3626 	const VkDevice										device			= m_context.getDevice();
3627 	const deUint32										familyIndex		= m_context.getUniversalQueueFamilyIndex();
3628 	const VkQueue										queue			= m_context.getUniversalQueue();
3629 	Allocator&											allocator		= m_context.getDefaultAllocator();
3630 
3631 	const Move<VkCommandPool>							cmdPool			= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, familyIndex);
3632 	const Move<VkCommandBuffer>							cmdBuffer		= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3633 
3634 	beginCommandBuffer(vk, *cmdBuffer, 0);
3635 	const std::vector<BlasPtr>							bottoms			= prepareBottomAccStructures(vk, device, allocator, *cmdBuffer);
3636 	TlasPtr												tlas			= prepareTopAccStructure(vk, device, allocator, *cmdBuffer, bottoms);
3637 	endCommandBuffer(vk, *cmdBuffer);
3638 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3639 
3640 	const deUint32										queryCount		= m_params->blasCount + 1;
3641 	std::vector<VkAccelerationStructureKHR>				handles			(queryCount);
3642 	handles[0] = *tlas->getPtr();
3643 	std::transform(bottoms.begin(), bottoms.end(), std::next(handles.begin()), [](const BlasPtr& blas){ return *blas->getPtr(); });
3644 
3645 	// only the first queryCount elements are results from ACCELERATION_STRUCTURE_SIZE queries.
3646 	const std::vector<VkDeviceSize>						sourceSizes		= getStructureSizes(handles);
3647 
3648 	std::vector<ASInterfacePtr>							sourceStructures;
3649 	sourceStructures.push_back(makeASInterfacePtr(tlas));
3650 	for (BlasPtr blas : bottoms) sourceStructures.push_back(makeASInterfacePtr(blas));
3651 
3652 	std::vector<ASInterfacePtr>							copies = makeCopyOfStructures(sourceStructures, sourceSizes);
3653 	std::transform(copies.begin(), copies.end(), handles.begin(), [](const ASInterfacePtr& intf) { return intf->getPtr(); });
3654 
3655 	const std::vector<VkDeviceSize>						copySizes = getStructureSizes(handles);
3656 
3657 	// verification
3658 	bool pass = true;
3659 	for (deUint32 i = 0; pass && i < queryCount; ++i)
3660 	{
3661 		pass = sourceSizes.at(i) == copySizes.at(i);
3662 	}
3663 
3664 	return pass ? TestStatus::pass("") : TestStatus::fail("");
3665 }
3666 
iterate(void)3667 TestStatus QueryPoolResultsPointersInstance::iterate (void)
3668 {
3669 	const DeviceInterface&								vk				= m_context.getDeviceInterface();
3670 	const VkDevice										device			= m_context.getDevice();
3671 	const deUint32										familyIndex		= m_context.getUniversalQueueFamilyIndex();
3672 	const VkQueue										queue			= m_context.getUniversalQueue();
3673 	Allocator&											allocator		= m_context.getDefaultAllocator();
3674 
3675 	const Move<VkCommandPool>							cmdPool			= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, familyIndex);
3676 	const Move<VkCommandBuffer>							cmdBuffer		= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3677 
3678 	beginCommandBuffer(vk, *cmdBuffer, 0);
3679 	const std::vector<BlasPtr>							bottoms			= prepareBottomAccStructures(vk, device, allocator, *cmdBuffer);
3680 	TlasPtr												tlas			= prepareTopAccStructure(vk, device, allocator, *cmdBuffer, bottoms);
3681 	endCommandBuffer(vk, *cmdBuffer);
3682 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3683 
3684 	const deUint32										queryCount		= m_params->blasCount + 1;
3685 	std::vector<VkAccelerationStructureKHR>				handles			(queryCount);
3686 	handles[0] = *tlas.get()->getPtr();
3687 	std::transform(bottoms.begin(), bottoms.end(), std::next(handles.begin()), [](const BlasPtr& blas){ return *blas.get()->getPtr(); });
3688 
3689 	const VkQueryType									queryType		= VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR;
3690 	Move<VkQueryPool>									queryPoolCounts	= makeQueryPool(vk, device, queryType, queryCount);
3691 
3692 	de::MovePtr<BufferWithMemory>						buffer;
3693 	std::vector<VkDeviceSize>							pointerCounts	(queryCount, 123u);
3694 
3695 	if (m_params->inVkBuffer)
3696 	{
3697 		const auto vci = makeBufferCreateInfo(queryCount * sizeof(VkDeviceSize), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3698 		buffer = makeMovePtr<BufferWithMemory>(vk, device, allocator, vci, MemoryRequirement::Coherent | MemoryRequirement::HostVisible);
3699 	}
3700 
3701 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3702 	{
3703 		beginCommandBuffer(vk, *cmdBuffer, 0);
3704 		vk.cmdResetQueryPool(*cmdBuffer, *queryPoolCounts, 0, queryCount);
3705 		vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(), queryType, *queryPoolCounts, 0);
3706 		if (m_params->inVkBuffer)
3707 		{
3708 			vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolCounts, 0, queryCount, **buffer, 0 /*offset*/,
3709 									   sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3710 		}
3711 		endCommandBuffer(vk, *cmdBuffer);
3712 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3713 
3714 		if (m_params->inVkBuffer)
3715 		{
3716 			Allocation&	alloc		= buffer->getAllocation();
3717 			invalidateMappedMemoryRange(vk, device, alloc.getMemory(), alloc.getOffset(), VK_WHOLE_SIZE);
3718 			auto		rangeCounts	= QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(alloc.getHostPtr(), queryCount);
3719 			std::copy_n(rangeCounts.first, queryCount, pointerCounts.begin());
3720 		}
3721 		else
3722 		{
3723 			VK_CHECK(vk.getQueryPoolResults(device, *queryPoolCounts, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3724 											pointerCounts.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3725 		}
3726 	}
3727 	else
3728 	{
3729 		vk.writeAccelerationStructuresPropertiesKHR(device, queryCount, handles.data(), queryType,
3730 													queryCount * sizeof(VkDeviceSize), pointerCounts.data(), sizeof(VkDeviceSize));
3731 	}
3732 
3733 	// verification
3734 	const std::vector<VkDeviceSize>						inSizes			= tlas->getSerializingSizes(vk, device, queue, familyIndex);
3735 	SerialStorage										storage			(vk, device, allocator, m_params->buildType, inSizes[0]);
3736 
3737 	beginCommandBuffer(vk, *cmdBuffer, 0);
3738 	tlas->serialize(vk, device, *cmdBuffer, &storage);
3739 	endCommandBuffer(vk, *cmdBuffer);
3740 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3741 
3742 	const SerialStorage::AccelerationStructureHeader*	header			= storage.getASHeader();
3743 
3744 	bool pass = (header->handleCount == pointerCounts[0]); // must be the same as bottoms.size()
3745 	for (deUint32 i = 1; pass && i < queryCount; ++i)
3746 	{
3747 		pass = (0 == pointerCounts[i]); // bottoms have no chidren
3748 	}
3749 
3750 	return pass ? TestStatus::pass("") : TestStatus::fail("");
3751 }
3752 
3753 
3754 struct CopyWithinPipelineParams
3755 {
3756 	enum class Type
3757 	{
3758 		StageASCopyBit,
3759 		StageAllTransferBit,
3760 		AccessSBTReadBit
3761 	}									type;
3762 	deUint32							width;
3763 	deUint32							height;
3764 	VkAccelerationStructureBuildTypeKHR	build;
3765 };
3766 typedef de::SharedPtr<const CopyWithinPipelineParams> CopyWithinPipelineParamsPtr;
3767 
3768 class CopyWithinPipelineInstance : public TestInstance
3769 {
3770 public:
3771 	using TlasPtr = de::SharedPtr<TopLevelAccelerationStructure>;
3772 	using BlasPtr = de::SharedPtr<BottomLevelAccelerationStructure>;
3773 
CopyWithinPipelineInstance(Context & context,CopyWithinPipelineParamsPtr params)3774 				CopyWithinPipelineInstance (Context& context, CopyWithinPipelineParamsPtr params)
3775 					: TestInstance	(context)
3776 					, vk			(context.getDeviceInterface())
3777 					, device		(context.getDevice())
3778 					, allocator		(context.getDefaultAllocator())
3779 					, rgenShader	(createShaderModule(vk, device, context.getBinaryCollection().get("rgen")))
3780 					, chitShader	(createShaderModule(vk, device, context.getBinaryCollection().get("chit")))
3781 					, missShader	(createShaderModule(vk, device, context.getBinaryCollection().get("miss")))
3782 					, m_params		(params)
3783 					, m_format		(VK_FORMAT_R32G32B32A32_SFLOAT) {}
3784 protected:
3785 	const DeviceInterface&		vk;
3786 	const VkDevice				device;
3787 	Allocator&					allocator;
3788 	Move<VkShaderModule>		rgenShader;
3789 	Move<VkShaderModule>		chitShader;
3790 	Move<VkShaderModule>		missShader;
3791 	CopyWithinPipelineParamsPtr	m_params;
3792 	VkFormat					m_format;
3793 };
3794 
3795 class CopyBlasInstance : public CopyWithinPipelineInstance
3796 {
3797 public:
CopyBlasInstance(Context & context,CopyWithinPipelineParamsPtr params)3798 				CopyBlasInstance	(Context& context, CopyWithinPipelineParamsPtr params)
3799 					: CopyWithinPipelineInstance(context, params) {}
3800 	TestStatus	iterate				(void) override;
3801 	auto		getRefImage			(BlasPtr blas) const -> de::MovePtr<BufferWithMemory>;
3802 
3803 };
3804 
3805 class CopySBTInstance : public CopyWithinPipelineInstance
3806 {
3807 public:
CopySBTInstance(Context & context,CopyWithinPipelineParamsPtr params)3808 				CopySBTInstance		(Context&			context,
3809 									 CopyWithinPipelineParamsPtr params)
3810 					: CopyWithinPipelineInstance(context, params) {}
3811 	TestStatus	iterate			(void) override;
3812 	auto		getBufferSizeForSBT	(const deUint32&	groupCount,
3813 									 const deUint32&	shaderGroupHandleSize,
3814 									 const deUint32&	shaderGroupBaseAlignment) const -> VkDeviceSize;
3815 	auto		getBufferForSBT		(const deUint32&	groupCount,
3816 									 const deUint32&	shaderGroupHandleSize,
3817 									 const deUint32&	shaderGroupBaseAlignment) const -> de::MovePtr<BufferWithMemory>;
3818 };
3819 
3820 class PipelineStageASCase : public TestCase
3821 {
3822 public:
PipelineStageASCase(TestContext & ctx,const char * name,CopyWithinPipelineParamsPtr params)3823 					PipelineStageASCase	(TestContext&			ctx,
3824 										 const char*			name,
3825 										 CopyWithinPipelineParamsPtr	params)
3826 						: TestCase	(ctx, name)
3827 						, m_params	(params) {}
3828 	void			initPrograms	(SourceCollections&		programs) const override;
3829 	void			checkSupport	(Context&				context) const override;
3830 	TestInstance*	createInstance	(Context&				context) const override;
3831 
3832 private:
3833 	CopyWithinPipelineParamsPtr	m_params;
3834 };
3835 
3836 namespace u
3837 {
3838 namespace details
3839 {
3840 template<class X, class Y> struct BarrierMaker {
3841 	const X& m_x;
BarrierMakervkt::RayTracing::__anon31ef80ca0111::u::details::BarrierMaker3842 	BarrierMaker (const X& x) : m_x(x) {}
countvkt::RayTracing::__anon31ef80ca0111::u::details::BarrierMaker3843 	uint32_t count () const { return 1; }
pointervkt::RayTracing::__anon31ef80ca0111::u::details::BarrierMaker3844 	const X* pointer () const { return &m_x; }
3845 };
3846 template<class Y> struct BarrierMaker<std::false_type, Y> {
BarrierMakervkt::RayTracing::__anon31ef80ca0111::u::details::BarrierMaker3847 	BarrierMaker (const std::false_type&) {}
countvkt::RayTracing::__anon31ef80ca0111::u::details::BarrierMaker3848 	uint32_t count () const { return 0; }
pointervkt::RayTracing::__anon31ef80ca0111::u::details::BarrierMaker3849 	Y* pointer () const { return nullptr; }
3850 };
3851 template<class Z, uint32_t N> struct BarrierMaker<const Z[N], Z> {
3852 	const Z (&m_a)[N];
BarrierMakervkt::RayTracing::__anon31ef80ca0111::u::details::BarrierMaker3853 	BarrierMaker (const Z (&a)[N]) : m_a(a) {}
countvkt::RayTracing::__anon31ef80ca0111::u::details::BarrierMaker3854 	uint32_t count () const { return N; }
pointervkt::RayTracing::__anon31ef80ca0111::u::details::BarrierMaker3855 	const Z* pointer () const { return m_a; }
3856 };
3857 template<class Mem, class Buf, class Img, class Exp>
3858 struct Sel {
3859 	typedef typename std::remove_cv<Mem>::type	t_Mem;
3860 	typedef typename std::remove_cv<Buf>::type	t_Buf;
3861 	typedef typename std::remove_cv<Img>::type	t_Img;
3862 	typedef std::integral_constant<uint32_t, 0> index0;
3863 	typedef std::integral_constant<uint32_t, 1> index1;
3864 	typedef std::integral_constant<uint32_t, 2> index2;
3865 	typedef std::integral_constant<uint32_t, 3> index3;
3866 	using isMem = std::is_same<t_Mem, Exp>;
3867 	using isBuf = std::is_same<t_Buf, Exp>;
3868 	using isImg = std::is_same<t_Img, Exp>;
3869 	template<bool B, class T, class F> using choose = typename std::conditional<B,T,F>::type;
3870 	typedef choose<isMem::value, BarrierMaker<Mem, Exp>,
3871 			choose<isBuf::value, BarrierMaker<Buf, Exp>,
3872 			choose<isImg::value, BarrierMaker<Img, Exp>,
3873 								 BarrierMaker<std::false_type, Exp>>>> type;
3874 	typedef choose<isMem::value, index0,
3875 			choose<isBuf::value, index1,
3876 			choose<isImg::value, index2,
3877 								 index3>>> index;
3878 };
3879 } // details
3880 constexpr std::false_type NoneBarriers{};
3881 /**
3882  * @brief	Helper function that makes and populates VkDependencyInfoKHR structure.
3883  * @param	barriers1 - any of VkMemoryBarrier2KHR, VkBufferMemoryBarrier2KHR or VkImageMemoryBarrier2KHR (mandatory param)
3884  * @param	barriers2 - any of VkMemoryBarrier2KHR, VkBufferMemoryBarrier2KHR or VkImageMemoryBarrier2KHR (optional param)
3885  * @param	barriers2 - any of VkMemoryBarrier2KHR, VkBufferMemoryBarrier2KHR or VkImageMemoryBarrier2KHR (optional param)
3886  * @note	The order of the parameters does not matter.
3887  */
3888 template<class Barriers1, class Barriers2 = std::false_type, class Barriers3 = std::false_type>
makeDependency(const Barriers1 & barriers1,const Barriers2 & barriers2=NoneBarriers,const Barriers3 & barriers3=NoneBarriers)3889 VkDependencyInfoKHR makeDependency (const Barriers1& barriers1, const Barriers2& barriers2 = NoneBarriers, const Barriers3& barriers3 = NoneBarriers)
3890 {
3891 	auto args = std::forward_as_tuple(barriers1, barriers2, barriers3, std::false_type());
3892 	const uint32_t memIndex = details::Sel<Barriers1, Barriers2, Barriers3, VkMemoryBarrier2KHR>::index::value;
3893 	const uint32_t bufIndex = details::Sel<Barriers1, Barriers2, Barriers3, VkBufferMemoryBarrier2KHR>::index::value;
3894 	const uint32_t imgIndex = details::Sel<Barriers1, Barriers2, Barriers3, VkImageMemoryBarrier2KHR>::index::value;
3895 	typedef typename details::Sel<Barriers1, Barriers2, Barriers3, VkMemoryBarrier2KHR>::type		memType;
3896 	typedef typename details::Sel<Barriers1, Barriers2, Barriers3, VkBufferMemoryBarrier2KHR>::type	bufType;
3897 	typedef typename details::Sel<Barriers1, Barriers2, Barriers3, VkImageMemoryBarrier2KHR>::type	imgType;
3898 	return
3899 	{
3900 		VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,			// VkStructureType					sType;
3901 		nullptr,										// const void*						pNext;
3902 		VK_DEPENDENCY_BY_REGION_BIT,					// VkDependencyFlags				dependencyFlags;
3903 		memType(std::get<memIndex>(args)).count(),		// uint32_t							memoryBarrierCount;
3904 		memType(std::get<memIndex>(args)).pointer(),	// const VkMemoryBarrier2KHR*		pMemoryBarriers;
3905 		bufType(std::get<bufIndex>(args)).count(),		// uint32_t							bufferMemoryBarrierCount;
3906 		bufType(std::get<bufIndex>(args)).pointer(),	// const VkBufferMemoryBarrier2KHR*	pBufferMemoryBarriers;
3907 		imgType(std::get<imgIndex>(args)).count(),		// uint32_t							imageMemoryBarrierCount;
3908 		imgType(std::get<imgIndex>(args)).pointer()		// const VkImageMemoryBarrier2KHR*	pImageMemoryBarriers;
3909 	};
3910 }
3911 } // u
3912 
createInstance(Context & context) const3913 TestInstance* PipelineStageASCase::createInstance (Context& context) const
3914 {
3915 	de::MovePtr<TestInstance> instance;
3916 	switch (m_params->type)
3917 	{
3918 	case CopyWithinPipelineParams::Type::StageASCopyBit:
3919 	case CopyWithinPipelineParams::Type::StageAllTransferBit:
3920 		instance = makeMovePtr<CopyBlasInstance>(context, m_params);
3921 		break;
3922 	case CopyWithinPipelineParams::Type::AccessSBTReadBit:
3923 		instance = makeMovePtr<CopySBTInstance>(context, m_params);
3924 		break;
3925 	}
3926 	return instance.release();
3927 }
3928 
initPrograms(SourceCollections & programs) const3929 void PipelineStageASCase::initPrograms (SourceCollections& programs) const
3930 {
3931 	const vk::ShaderBuildOptions	buildOptions	(programs.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
3932 	const char						endl			= '\n';
3933 
3934 	{
3935 		std::stringstream str;
3936 		str << "#version 460 core"																		<< endl
3937 			<< "#extension GL_EXT_ray_tracing : require"												<< endl
3938 			<< "layout(location = 0) rayPayloadEXT vec4 payload;"										<< endl
3939 			<< "layout(rgba32f, set = 0, binding = 0) uniform image2D result;"							<< endl
3940 			<< "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;"				<< endl
3941 			<< "void main()"																			<< endl
3942 			<< "{"																						<< endl
3943 			<< "  float rx           = (float(gl_LaunchIDEXT.x) + 0.5) / float(gl_LaunchSizeEXT.x);"	<< endl
3944 			<< "  float ry           = (float(gl_LaunchIDEXT.y) + 0.5) / float(gl_LaunchSizeEXT.y);"	<< endl
3945 			<< "  payload            = vec4(0.5, 0.5, 0.5, 1.0);"										<< endl
3946 			<< "  vec3  orig         = vec3(rx, ry, 1.0);"												<< endl
3947 			<< "  vec3  dir          = vec3(0.0, 0.0, -1.0);"											<< endl
3948 			<< "  traceRayEXT(topLevelAS, gl_RayFlagsNoneEXT, 0xFFu, 0, 0, 0, orig, 0.0, dir, 2.0, 0);"	<< endl
3949 			<< "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), payload);"								<< endl
3950 			<< "}";
3951 		str.flush();
3952 		programs.glslSources.add("rgen") << glu::RaygenSource(str.str()) << buildOptions;
3953 	}
3954 
3955 	{
3956 		std::stringstream str;
3957 		str << "#version 460 core"									<< endl
3958 			<< "#extension GL_EXT_ray_tracing : require"			<< endl
3959 			<< "layout(location = 0) rayPayloadInEXT vec4 payload;"	<< endl
3960 			<< "void main()"										<< endl
3961 			<< "{"													<< endl
3962 			<< "  payload = vec4(0.0, 1.0, 0.0, 1.0);"				<< endl
3963 			<< "}";
3964 		str.flush();
3965 		programs.glslSources.add("chit") << glu::ClosestHitSource(str.str()) << buildOptions;
3966 	}
3967 
3968 	{
3969 		std::stringstream str;
3970 		str	<< "#version 460 core"									<< endl
3971 			<< "#extension GL_EXT_ray_tracing : require"			<< endl
3972 			<< "layout(location = 0) rayPayloadInEXT vec4 payload;"	<< endl
3973 			<< "void main()"										<< endl
3974 			<< "{"													<< endl
3975 			<< "  payload = vec4(1.0, 0.0, 0.0, 1.0);"				<< endl
3976 			<< "}";
3977 		str.flush();
3978 		programs.glslSources.add("miss") << glu::MissSource(str.str()) << buildOptions;
3979 	}
3980 }
3981 
checkSupport(Context & context) const3982 void PipelineStageASCase::checkSupport (Context& context) const
3983 {
3984 	context.requireInstanceFunctionality(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
3985 	context.requireDeviceFunctionality(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME);
3986 	context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME);
3987 	context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME);
3988 	context.requireDeviceFunctionality(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
3989 
3990 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
3991 	if (m_params->build == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
3992 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR::accelerationStructureHostCommands");
3993 
3994 	const VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR& maintenance1FeaturesKHR = context.getRayTracingMaintenance1Features();
3995 	if (maintenance1FeaturesKHR.rayTracingMaintenance1 == VK_FALSE)
3996 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR::rayTracingMaintenance1");
3997 
3998 	const VkPhysicalDeviceSynchronization2FeaturesKHR& synchronization2Features = context.getSynchronization2Features();
3999 	if (synchronization2Features.synchronization2 == VK_FALSE)
4000 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceSynchronization2FeaturesKHR::synchronization2");
4001 
4002 	if (m_params->type != CopyWithinPipelineParams::Type::AccessSBTReadBit)
4003 	{
4004 		context.requireDeviceFunctionality(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
4005 		const VkPhysicalDevicePushDescriptorPropertiesKHR&		pushDescriptorProperties = context.getPushDescriptorProperties();
4006 		if (pushDescriptorProperties.maxPushDescriptors < 32)
4007 			TCU_THROW(NotSupportedError, "Requires VK_KHR_push_descriptor extension");
4008 	}
4009 }
4010 
getRefImage(BlasPtr blas) const4011 auto CopyBlasInstance::getRefImage (BlasPtr blas) const -> de::MovePtr<BufferWithMemory>
4012 {
4013 	const deUint32							queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
4014 	const VkQueue							queue						= m_context.getUniversalQueue();
4015 
4016 	const de::MovePtr<RayTracingProperties>	rtProps						= makeRayTracingProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice());
4017 	const deUint32							shaderGroupHandleSize		= rtProps->getShaderGroupHandleSize();
4018 	const deUint32							shaderGroupBaseAlignment	= rtProps->getShaderGroupBaseAlignment();
4019 
4020 	const VkImageCreateInfo					imageCreateInfo				= makeImageCreateInfo(m_params->width, m_params->height, m_format);
4021 	const VkImageSubresourceRange			imageSubresourceRange		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
4022 	const de::MovePtr<ImageWithMemory>		image						= makeMovePtr<ImageWithMemory>(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any);
4023 	const Move<VkImageView>					view						= makeImageView(vk, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
4024 
4025 	const deUint32							bufferSize					= (m_params->width * m_params->height * mapVkFormat(m_format).getPixelSize());
4026 	const VkBufferCreateInfo				bufferCreateInfo			= makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4027 	de::MovePtr<BufferWithMemory>			buffer						= makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4028 
4029 	const VkImageSubresourceLayers			imageSubresourceLayers		= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4030 	const VkBufferImageCopy					bufferCopyImageRegion		= makeBufferImageCopy(makeExtent3D(m_params->width, m_params->height, 1u), imageSubresourceLayers);
4031 
4032 	de::MovePtr<RayTracingPipeline>			rtPipeline					= makeMovePtr<RayTracingPipeline>();
4033 	rtPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		*rgenShader, 0);
4034 	rtPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	*chitShader, 1);
4035 	rtPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			*missShader, 2);
4036 
4037 	const Move<VkDescriptorPool>			descriptorPool				= DescriptorPoolBuilder()
4038 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2)
4039 		.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 2)
4040 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4041 	const Move<VkDescriptorSetLayout>		descriptorSetLayout			= DescriptorSetLayoutBuilder()
4042 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
4043 		.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
4044 		.build(vk, device);
4045 	const Move<VkDescriptorSet>				descriptorSet			= makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
4046 
4047 	const Move<VkPipelineLayout>			pipelineLayout				= makePipelineLayout(vk, device, *descriptorSetLayout);
4048 	Move<VkPipeline>						pipeline					= rtPipeline->createPipeline(vk, device, *pipelineLayout);
4049 
4050 	de::MovePtr<BufferWithMemory>			rgenSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4051 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
4052 	VkStridedDeviceAddressRegionKHR			rgenRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **rgenSbt, 0),
4053 																											shaderGroupHandleSize, shaderGroupHandleSize);
4054 	de::MovePtr<BufferWithMemory>			chitSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4055 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
4056 	VkStridedDeviceAddressRegionKHR			chitRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **chitSbt, 0),
4057 																											shaderGroupHandleSize, shaderGroupHandleSize);
4058 	de::MovePtr<BufferWithMemory>			missSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4059 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
4060 	VkStridedDeviceAddressRegionKHR			missRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **missSbt, 0),
4061 																											shaderGroupHandleSize, shaderGroupHandleSize);
4062 	const VkStridedDeviceAddressRegionKHR	callRegion					= makeStridedDeviceAddressRegionKHR(VkDeviceAddress(0), 0, 0);
4063 
4064 	const VkClearValue						clearValue					= { { { 0.1f, 0.2f, 0.3f, 0.4f } } };
4065 
4066 	const VkImageMemoryBarrier2KHR			preClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, 0,
4067 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4068 																								  VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4069 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4070 	const VkImageMemoryBarrier2KHR			postClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4071 																								  VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_READ_BIT_KHR,
4072 																								  VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
4073 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4074 	const VkDependencyInfoKHR				preClearImageDependency		= u::makeDependency(preClearImageImageBarrier);
4075 	const VkDependencyInfoKHR				postClearImageDependency	= u::makeDependency(postClearImageImageBarrier);
4076 
4077 
4078 	const VkImageMemoryBarrier2KHR			postTraceRaysImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
4079 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_READ_BIT_KHR,
4080 																								  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4081 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4082 	const VkImageMemoryBarrier2KHR			postCopyImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,	VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4083 																								  VK_PIPELINE_STAGE_2_HOST_BIT_KHR, VK_ACCESS_2_HOST_READ_BIT_KHR,
4084 																								  VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4085 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4086 	const VkDependencyInfoKHR				postTraceRaysDependency		= u::makeDependency(postTraceRaysImageBarrier);
4087 	const VkDependencyInfoKHR				postCopyImageDependency		= u::makeDependency(postCopyImageImageBarrier);
4088 
4089 	const Move<VkCommandPool>				cmdPool						= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
4090 	const Move<VkCommandBuffer>				cmdBuffer					= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4091 
4092 	auto									tlas						= makeTopLevelAccelerationStructure();
4093 	tlas->setBuildType(m_params->build);
4094 	tlas->setInstanceCount(1);
4095 	tlas->addInstance(blas, identityMatrix3x4, 0, (~0u), 0, VkGeometryInstanceFlagsKHR(0));
4096 	beginCommandBuffer(vk, *cmdBuffer);
4097 		tlas->createAndBuild(vk, device, *cmdBuffer, allocator);
4098 	endCommandBuffer(vk, *cmdBuffer);
4099 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4100 
4101 	const VkDescriptorImageInfo				descriptorImageInfo			= makeDescriptorImageInfo(VkSampler(), *view, VK_IMAGE_LAYOUT_GENERAL);
4102 	const VkWriteDescriptorSetAccelerationStructureKHR writeDescriptorTlas
4103 	{
4104 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4105 		nullptr,															//  const void*							pNext;
4106 		1,																	//  deUint32							accelerationStructureCount;
4107 		tlas->getPtr()														//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4108 	};
4109 
4110 	DescriptorSetUpdateBuilder()
4111 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
4112 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &writeDescriptorTlas)
4113 		.update(vk, device);
4114 
4115 	beginCommandBuffer(vk, *cmdBuffer);
4116 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4117 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, nullptr);
4118 		vk.cmdPipelineBarrier2(*cmdBuffer, &preClearImageDependency);
4119 		vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
4120 		vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImageDependency);
4121 		cmdTraceRays(vk,
4122 			*cmdBuffer,
4123 			&rgenRegion,	// rgen
4124 			&missRegion,	// miss
4125 			&chitRegion,	// hit
4126 			&callRegion,	// call
4127 			m_params->width, m_params->height, 1);
4128 		vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4129 		vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **buffer, 1u, &bufferCopyImageRegion);
4130 		vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4131 	endCommandBuffer(vk, *cmdBuffer);
4132 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4133 
4134 	invalidateMappedMemoryRange(vk, device, buffer->getAllocation().getMemory(), buffer->getAllocation().getOffset(), bufferSize);
4135 
4136 	return buffer;
4137 }
4138 
iterate(void)4139 TestStatus CopyBlasInstance::iterate (void)
4140 {
4141 	const deUint32							queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
4142 	const VkQueue							queue						= m_context.getUniversalQueue();
4143 
4144 	const de::MovePtr<RayTracingProperties>	rtProps						= makeRayTracingProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice());
4145 	const deUint32							shaderGroupHandleSize		= rtProps->getShaderGroupHandleSize();
4146 	const deUint32							shaderGroupBaseAlignment	= rtProps->getShaderGroupBaseAlignment();
4147 
4148 	const VkImageCreateInfo					imageCreateInfo				= makeImageCreateInfo(m_params->width, m_params->height, m_format);
4149 	const VkImageSubresourceRange			imageSubresourceRange		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
4150 	const de::MovePtr<ImageWithMemory>		image						= makeMovePtr<ImageWithMemory>(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any);
4151 	const Move<VkImageView>					view						= makeImageView(vk, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
4152 
4153 	const deUint32							bufferSize					= (m_params->width * m_params->height * mapVkFormat(m_format).getPixelSize());
4154 	const VkBufferCreateInfo				bufferCreateInfo			= makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4155 	de::MovePtr<BufferWithMemory>			resultImageBuffer			= makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4156 
4157 	const VkImageSubresourceLayers			imageSubresourceLayers		= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4158 	const VkBufferImageCopy					bufferCopyImageRegion		= makeBufferImageCopy(makeExtent3D(m_params->width, m_params->height, 1u), imageSubresourceLayers);
4159 
4160 	de::MovePtr<RayTracingPipeline>			rtPipeline					= makeMovePtr<RayTracingPipeline>();
4161 	rtPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		*rgenShader, 0);
4162 	rtPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	*chitShader, 1);
4163 	rtPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			*missShader, 2);
4164 
4165 	const Move<VkDescriptorSetLayout>		descriptorSetLayout			= DescriptorSetLayoutBuilder()
4166 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
4167 		.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
4168 		.build(vk, device, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
4169 
4170 	const Move<VkPipelineLayout>			pipelineLayout				= makePipelineLayout(vk, device, *descriptorSetLayout);
4171 	Move<VkPipeline>						pipeline					= rtPipeline->createPipeline(vk, device, *pipelineLayout);
4172 
4173 	de::MovePtr<BufferWithMemory>			rgenSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4174 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
4175 	VkStridedDeviceAddressRegionKHR			rgenRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **rgenSbt, 0),
4176 																											shaderGroupHandleSize, shaderGroupHandleSize);
4177 	de::MovePtr<BufferWithMemory>			chitSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4178 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
4179 	VkStridedDeviceAddressRegionKHR			chitRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **chitSbt, 0),
4180 																											shaderGroupHandleSize, shaderGroupHandleSize);
4181 	de::MovePtr<BufferWithMemory>			missSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4182 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
4183 	VkStridedDeviceAddressRegionKHR			missRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **missSbt, 0),
4184 																											shaderGroupHandleSize, shaderGroupHandleSize);
4185 	const VkStridedDeviceAddressRegionKHR	callRegion					= makeStridedDeviceAddressRegionKHR(VkDeviceAddress(0), 0, 0);
4186 
4187 	const VkClearValue						clearValue					= { { { 0.1f, 0.2f, 0.3f, 0.4f } } };
4188 
4189 	const VkImageMemoryBarrier2KHR			preClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, 0,
4190 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4191 																								  VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4192 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4193 	const VkImageMemoryBarrier2KHR			postClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4194 																								  VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_READ_BIT_KHR,
4195 																								  VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
4196 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4197 	const VkDependencyInfoKHR				preClearImageDependency		= u::makeDependency(preClearImageImageBarrier);
4198 	const VkDependencyInfoKHR				postClearImageDependency	= u::makeDependency(postClearImageImageBarrier);
4199 
4200 
4201 	const VkImageMemoryBarrier2KHR			postTraceRaysImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
4202 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_READ_BIT_KHR,
4203 																								  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4204 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4205 	const VkImageMemoryBarrier2KHR			postCopyImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,	VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4206 																								  VK_PIPELINE_STAGE_2_HOST_BIT_KHR, VK_ACCESS_2_HOST_READ_BIT_KHR,
4207 																								  VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4208 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4209 	const VkDependencyInfoKHR				postTraceRaysDependency		= u::makeDependency(postTraceRaysImageBarrier);
4210 	const VkDependencyInfoKHR				postCopyImageDependency		= u::makeDependency(postCopyImageImageBarrier);
4211 	const VkPipelineStageFlags2KHR			srcStageMask				= m_params->type == CopyWithinPipelineParams::Type::StageASCopyBit
4212 																			? VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR
4213 																			: VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT_KHR;
4214 	const VkMemoryBarrier2KHR				copyBlasMemoryBarrier		= makeMemoryBarrier2(srcStageMask, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4215 																							 VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
4216 																							 VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR);
4217 	const VkDependencyInfoKHR				copyBlasDependency			= u::makeDependency(copyBlasMemoryBarrier);
4218 
4219 
4220 	const Move<VkCommandPool>				cmdPool						= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
4221 	const Move<VkCommandBuffer>				cmdBuffer					= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4222 
4223 	std::vector<VkDeviceSize>				blasSize					(1);
4224 	BlasPtr									blas1						(makeBottomLevelAccelerationStructure().release());
4225 
4226 	// After this block the blas1 stays on device or host respectively to its build type.
4227 	// Once it is created it is asked for the serialization size that will be used for a
4228 	// creation of an empty blas2. Probably this size will be bigger than it is needed but
4229 	// one thing that is important is it must not be less.
4230 	{
4231 		const VkQueryType query = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
4232 		Move<VkQueryPool> queryPoolSize = makeQueryPool(vk, device, query, 1);
4233 		beginCommandBuffer(vk, *cmdBuffer);
4234 			blas1->setBuildType(m_params->build);
4235 			blas1->setGeometryData(	{
4236 					{ 0.0, 0.0, 0.0 },
4237 					{ 1.0, 0.0, 0.0 },
4238 					{ 0.0, 1.0, 0.0 }}, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
4239 			blas1->createAndBuild(vk, device, *cmdBuffer, allocator);
4240 			queryAccelerationStructureSize(vk, device, *cmdBuffer, { *blas1->getPtr() }, m_params->build, *queryPoolSize, query, 0u, blasSize);
4241 		endCommandBuffer(vk, *cmdBuffer);
4242 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4243 		if (m_params->build == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
4244 		{
4245 			VK_CHECK(vk.getQueryPoolResults(device, *queryPoolSize, 0u, 1, sizeof(VkDeviceSize), blasSize.data(),
4246 											sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
4247 		}
4248 	}
4249 
4250 	de::MovePtr<BufferWithMemory>			referenceImageBuffer	= getRefImage(blas1);
4251 
4252 	// Create blas2 as empty struct
4253 	BlasPtr									blas2					(makeBottomLevelAccelerationStructure().release());
4254 	blas2->create(vk, device, allocator, blasSize[0]);
4255 
4256 	auto									tlas					= makeTopLevelAccelerationStructure();
4257 	tlas->setBuildType(m_params->build);
4258 	tlas->setInstanceCount(1);
4259 	tlas->addInstance(blas2, identityMatrix3x4, 0, (~0u), 0, VkGeometryInstanceFlagsKHR(0));
4260 
4261 	const VkCopyAccelerationStructureInfoKHR copyBlasInfo
4262 	{
4263 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,		// VkStructureType						sType;
4264 		nullptr,													// const void*							pNext;
4265 		*blas1->getPtr(),											// VkAccelerationStructureKHR			src;
4266 		*blas2->getPtr(),											// VkAccelerationStructureKHR			dst;
4267 		VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR				// VkCopyAccelerationStructureModeKHR	mode;
4268 	};
4269 
4270 	beginCommandBuffer(vk, *cmdBuffer);
4271 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4272 
4273 		if (m_params->build == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
4274 		{
4275 			vk.cmdCopyAccelerationStructureKHR(*cmdBuffer, &copyBlasInfo);
4276 			vk.cmdPipelineBarrier2(*cmdBuffer, &copyBlasDependency);
4277 		}
4278 		else VK_CHECK(vk.copyAccelerationStructureKHR(device, VkDeferredOperationKHR(0), &copyBlasInfo));
4279 
4280 		tlas->createAndBuild(vk, device, *cmdBuffer, allocator);
4281 
4282 		const VkDescriptorImageInfo				descriptorImageInfo			= makeDescriptorImageInfo(VkSampler(), *view, VK_IMAGE_LAYOUT_GENERAL);
4283 		const VkWriteDescriptorSetAccelerationStructureKHR writeDescriptorTlas
4284 		{
4285 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4286 			nullptr,															//  const void*							pNext;
4287 			1,																	//  deUint32							accelerationStructureCount;
4288 			tlas->getPtr()														//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4289 		};
4290 
4291 		DescriptorSetUpdateBuilder()
4292 			.writeSingle(VkDescriptorSet(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
4293 			.writeSingle(VkDescriptorSet(), DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &writeDescriptorTlas)
4294 			.updateWithPush(vk, *cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 0, 2);
4295 
4296 		vk.cmdPipelineBarrier2(*cmdBuffer, &preClearImageDependency);
4297 		vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
4298 		vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImageDependency);
4299 
4300 		cmdTraceRays(vk,
4301 			*cmdBuffer,
4302 			&rgenRegion,	// rgen
4303 			&missRegion,	// miss
4304 			&chitRegion,	// hit
4305 			&callRegion,	// call
4306 			m_params->width, m_params->height, 1);
4307 
4308 		vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4309 		vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **resultImageBuffer, 1u, &bufferCopyImageRegion);
4310 		vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4311 
4312 	endCommandBuffer(vk, *cmdBuffer);
4313 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4314 
4315 	invalidateMappedMemoryRange(vk, device, resultImageBuffer->getAllocation().getMemory(), resultImageBuffer->getAllocation().getOffset(), bufferSize);
4316 
4317 	const void*	referenceImageData	= referenceImageBuffer->getAllocation().getHostPtr();
4318 	const void*	resultImageData		= resultImageBuffer->getAllocation().getHostPtr();
4319 
4320 	return (deMemCmp(referenceImageData, resultImageData, bufferSize) == 0) ? TestStatus::pass("") : TestStatus::fail("Reference and result images differ");
4321 }
4322 
getBufferSizeForSBT(const deUint32 & groupCount,const deUint32 & shaderGroupHandleSize,const deUint32 & shaderGroupBaseAlignment) const4323 VkDeviceSize CopySBTInstance::getBufferSizeForSBT (const deUint32& groupCount, const deUint32&	shaderGroupHandleSize, const deUint32& shaderGroupBaseAlignment) const
4324 {
4325 	DE_UNREF(shaderGroupBaseAlignment);
4326 	return (groupCount * deAlign32(shaderGroupHandleSize, shaderGroupHandleSize));
4327 }
4328 
getBufferForSBT(const deUint32 & groupCount,const deUint32 & shaderGroupHandleSize,const deUint32 & shaderGroupBaseAlignment) const4329 de::MovePtr<BufferWithMemory> CopySBTInstance::getBufferForSBT (const deUint32& groupCount, const deUint32&	shaderGroupHandleSize, const deUint32& shaderGroupBaseAlignment) const
4330 {
4331 	const VkDeviceSize			sbtSize				= getBufferSizeForSBT(groupCount, shaderGroupHandleSize, shaderGroupBaseAlignment);
4332 	const VkBufferUsageFlags	sbtFlags			= VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
4333 	const VkBufferCreateInfo	sbtCreateInfo		= makeBufferCreateInfo(sbtSize, sbtFlags);
4334 	const MemoryRequirement		sbtMemRequirements	= MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
4335 
4336 	return makeMovePtr<BufferWithMemory>(vk, device, allocator, sbtCreateInfo, sbtMemRequirements);
4337 }
4338 
iterate(void)4339 TestStatus CopySBTInstance::iterate (void)
4340 {
4341 	const deUint32							queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
4342 	const VkQueue							queue						= m_context.getUniversalQueue();
4343 
4344 	const de::MovePtr<RayTracingProperties>	rtProps						= makeRayTracingProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice());
4345 	const deUint32							shaderGroupHandleSize		= rtProps->getShaderGroupHandleSize();
4346 	const deUint32							shaderGroupBaseAlignment	= rtProps->getShaderGroupBaseAlignment();
4347 
4348 	const VkImageCreateInfo					imageCreateInfo				= makeImageCreateInfo(m_params->width, m_params->height, m_format);
4349 	const VkImageSubresourceRange			imageSubresourceRange		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
4350 	const de::MovePtr<ImageWithMemory>		image						= makeMovePtr<ImageWithMemory>(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any);
4351 	const Move<VkImageView>					view						= makeImageView(vk, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
4352 
4353 	const deUint32							bufferSize					= (m_params->width * m_params->height * mapVkFormat(m_format).getPixelSize());
4354 	const VkBufferCreateInfo				bufferCreateInfo			= makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4355 	de::MovePtr<BufferWithMemory>			referenceImageBuffer		= makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4356 	de::MovePtr<BufferWithMemory>			resultImageBuffer			= makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4357 
4358 	const VkImageSubresourceLayers			imageSubresourceLayers		= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4359 	const VkBufferImageCopy					bufferCopyImageRegion		= makeBufferImageCopy(makeExtent3D(m_params->width, m_params->height, 1u), imageSubresourceLayers);
4360 
4361 	de::MovePtr<RayTracingPipeline>			rtPipeline					= makeMovePtr<RayTracingPipeline>();
4362 	rtPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		*rgenShader, 0);
4363 	rtPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	*chitShader, 1);
4364 	rtPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			*missShader, 2);
4365 
4366 	const Move<VkDescriptorPool>			descriptorPool				= DescriptorPoolBuilder()
4367 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
4368 		.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
4369 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4370 	const Move<VkDescriptorSetLayout>		descriptorSetLayout			= DescriptorSetLayoutBuilder()
4371 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
4372 		.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
4373 		.build(vk, device);
4374 	const Move<VkDescriptorSet>				descriptorSet				= makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
4375 
4376 	const Move<VkPipelineLayout>			pipelineLayout				= makePipelineLayout(vk, device, *descriptorSetLayout);
4377 	Move<VkPipeline>						pipeline					= rtPipeline->createPipeline(vk, device, *pipelineLayout);
4378 
4379 	de::MovePtr<BufferWithMemory>			sourceRgenSbt				= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4380 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1,
4381 																											   VkBufferCreateFlags(0), VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
4382 	VkStridedDeviceAddressRegionKHR			sourceRgenRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **sourceRgenSbt, 0),
4383 																											shaderGroupHandleSize, shaderGroupHandleSize);
4384 	de::MovePtr<BufferWithMemory>			copyRgenSbt					= getBufferForSBT(1, shaderGroupHandleSize, shaderGroupBaseAlignment);
4385 	VkStridedDeviceAddressRegionKHR			copyRgenRegion				= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **copyRgenSbt, 0),
4386 																											shaderGroupHandleSize, shaderGroupHandleSize);
4387 	de::MovePtr<BufferWithMemory>			chitSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4388 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
4389 	VkStridedDeviceAddressRegionKHR			chitRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **chitSbt, 0),
4390 																											shaderGroupHandleSize, shaderGroupHandleSize);
4391 	de::MovePtr<BufferWithMemory>			missSbt						= rtPipeline->createShaderBindingTable(vk, device, *pipeline, allocator,
4392 																											   shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
4393 	VkStridedDeviceAddressRegionKHR			missRegion					= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vk, device, **missSbt, 0),
4394 																											shaderGroupHandleSize, shaderGroupHandleSize);
4395 	const VkStridedDeviceAddressRegionKHR	callRegion					= makeStridedDeviceAddressRegionKHR(VkDeviceAddress(0), 0, 0);
4396 
4397 	const VkClearValue						clearValue					= { { { 0.1f, 0.2f, 0.3f, 0.4f } } };
4398 
4399 	const VkImageMemoryBarrier2KHR			preClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, 0,
4400 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4401 																								  VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4402 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4403 	const VkImageMemoryBarrier2KHR			postClearImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4404 																								  VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_READ_BIT_KHR,
4405 																								  VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
4406 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4407 	const VkDependencyInfoKHR				preClearImageDependency		= u::makeDependency(preClearImageImageBarrier);
4408 	const VkDependencyInfoKHR				postClearImageDependency	= u::makeDependency(postClearImageImageBarrier);
4409 
4410 
4411 	const VkImageMemoryBarrier2KHR			postTraceRaysImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
4412 																								  VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_READ_BIT_KHR,
4413 																								  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4414 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4415 	const VkImageMemoryBarrier2KHR			postCopyImageImageBarrier	= makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,	VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4416 																								  VK_PIPELINE_STAGE_2_HOST_BIT_KHR, VK_ACCESS_2_HOST_READ_BIT_KHR,
4417 																								  VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4418 																								  **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4419 	const VkDependencyInfoKHR				postTraceRaysDependency		= u::makeDependency(postTraceRaysImageBarrier);
4420 	const VkDependencyInfoKHR				postCopyImageDependency		= u::makeDependency(postCopyImageImageBarrier);
4421 
4422 	const Move<VkCommandPool>				cmdPool						= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
4423 	const Move<VkCommandBuffer>				cmdBuffer					= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4424 
4425 	auto									tlas						= makeTopLevelAccelerationStructure();
4426 	BlasPtr									blas						(makeBottomLevelAccelerationStructure().release());
4427 	blas->setBuildType(m_params->build);
4428 	blas->setGeometryData(	{
4429 			{ 0.0, 0.0, 0.0 },
4430 			{ 1.0, 0.0, 0.0 },
4431 			{ 0.0, 1.0, 0.0 }}, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
4432 	tlas->setBuildType(m_params->build);
4433 	tlas->setInstanceCount(1);
4434 	tlas->addInstance(blas, identityMatrix3x4, 0, (~0u), 0, VkGeometryInstanceFlagsKHR(0));
4435 	beginCommandBuffer(vk, *cmdBuffer);
4436 		blas->createAndBuild(vk, device, *cmdBuffer, allocator);
4437 		tlas->createAndBuild(vk, device, *cmdBuffer, allocator);
4438 	endCommandBuffer(vk, *cmdBuffer);
4439 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4440 
4441 	const VkDescriptorImageInfo				descriptorImageInfo			= makeDescriptorImageInfo(VkSampler(), *view, VK_IMAGE_LAYOUT_GENERAL);
4442 	const VkWriteDescriptorSetAccelerationStructureKHR writeDescriptorTlas
4443 	{
4444 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4445 		nullptr,															//  const void*							pNext;
4446 		1,																	//  deUint32							accelerationStructureCount;
4447 		tlas->getPtr()														//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4448 	};
4449 
4450 	DescriptorSetUpdateBuilder()
4451 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
4452 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &writeDescriptorTlas)
4453 		.update(vk, device);
4454 
4455 	beginCommandBuffer(vk, *cmdBuffer);
4456 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4457 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, nullptr);
4458 		vk.cmdPipelineBarrier2(*cmdBuffer, &preClearImageDependency);
4459 		vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
4460 		vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImageDependency);
4461 		cmdTraceRays(vk,
4462 			*cmdBuffer,
4463 			&sourceRgenRegion,	// rgen
4464 			&missRegion,		// miss
4465 			&chitRegion,		// hit
4466 			&callRegion,		// call
4467 			m_params->width, m_params->height, 1);
4468 		vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4469 		vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **referenceImageBuffer, 1u, &bufferCopyImageRegion);
4470 		vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4471 	endCommandBuffer(vk, *cmdBuffer);
4472 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4473 
4474 
4475 	const VkBufferCopy bufferCopy
4476 	{
4477 		0,	// VkDeviceSize srcOffset;
4478 		0,	// VkDeviceSize srcOffset;
4479 		getBufferSizeForSBT(1, shaderGroupHandleSize, shaderGroupBaseAlignment)
4480 	};
4481 	const VkMemoryBarrier2KHR				postCopySBTMemoryBarrier	= makeMemoryBarrier2(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR,
4482 																							 VkAccessFlags2KHR(0),
4483 																							 VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR,
4484 																							 VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR);
4485 	const VkDependencyInfoKHR				postClearImgCopySBTDependency	= u::makeDependency(postCopySBTMemoryBarrier, postClearImageImageBarrier);
4486 
4487 	beginCommandBuffer(vk, *cmdBuffer);
4488 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4489 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, nullptr);
4490 		vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
4491 		vk.cmdCopyBuffer(*cmdBuffer, **sourceRgenSbt, **copyRgenSbt, 1, &bufferCopy);
4492 		vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImgCopySBTDependency);
4493 		cmdTraceRays(vk,
4494 			*cmdBuffer,
4495 			&copyRgenRegion,	// rgen
4496 			&missRegion,		// miss
4497 			&chitRegion,		// hit
4498 			&callRegion,		// call
4499 			m_params->width, m_params->height, 1);
4500 		vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4501 		vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **resultImageBuffer, 1u, &bufferCopyImageRegion);
4502 		vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4503 	endCommandBuffer(vk, *cmdBuffer);
4504 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4505 
4506 	invalidateMappedMemoryRange(vk, device, referenceImageBuffer->getAllocation().getMemory(), referenceImageBuffer->getAllocation().getOffset(), bufferSize);
4507 	invalidateMappedMemoryRange(vk, device, resultImageBuffer->getAllocation().getMemory(), resultImageBuffer->getAllocation().getOffset(), bufferSize);
4508 
4509 	const void* referenceImageDataPtr	= referenceImageBuffer->getAllocation().getHostPtr();
4510 	const void* resultImageDataPtr		= resultImageBuffer->getAllocation().getHostPtr();
4511 
4512 	return (deMemCmp(referenceImageDataPtr, resultImageDataPtr, bufferSize) == 0) ? TestStatus::pass("") : TestStatus::fail("");
4513 }
4514 
4515 class ASUpdateCase : public RayTracingASBasicTestCase
4516 {
4517 public:
4518 					ASUpdateCase	(tcu::TestContext& context, const char* name, const TestParams& data);
4519 					~ASUpdateCase	(void);
4520 
4521 	TestInstance*	createInstance	(Context& context) const override;
4522 };
4523 
4524 class ASUpdateInstance : public RayTracingASBasicTestInstance
4525 {
4526 public:
4527 									ASUpdateInstance	(Context& context, const TestParams& data);
4528 									~ASUpdateInstance	(void) = default;
4529 	tcu::TestStatus					iterate				(void) override;
4530 
4531 private:
4532 	TestParams						m_data;
4533 };
4534 
ASUpdateCase(tcu::TestContext & context,const char * name,const TestParams & data)4535 ASUpdateCase::ASUpdateCase (tcu::TestContext& context, const char* name, const TestParams& data)
4536 	: RayTracingASBasicTestCase	(context, name, data)
4537 {
4538 }
4539 
~ASUpdateCase(void)4540 ASUpdateCase::~ASUpdateCase	(void)
4541 {
4542 }
4543 
createInstance(Context & context) const4544 TestInstance* ASUpdateCase::createInstance (Context& context) const
4545 {
4546 	return new ASUpdateInstance(context, m_data);
4547 }
4548 
4549 
ASUpdateInstance(Context & context,const TestParams & data)4550 ASUpdateInstance::ASUpdateInstance (Context& context, const TestParams& data)
4551 	: RayTracingASBasicTestInstance		(context, data)
4552 	, m_data				(data)
4553 {
4554 }
4555 
iterate(void)4556 TestStatus ASUpdateInstance::iterate (void)
4557 {
4558 	const InstanceInterface&			vki									= m_context.getInstanceInterface();
4559 	const DeviceInterface&				vkd									= m_context.getDeviceInterface();
4560 	const VkDevice						device								= m_context.getDevice();
4561 	const VkPhysicalDevice				physicalDevice						= m_context.getPhysicalDevice();
4562 	const deUint32						queueFamilyIndex					= m_context.getUniversalQueueFamilyIndex();
4563 	const VkQueue						queue								= m_context.getUniversalQueue();
4564 	Allocator&							allocator							= m_context.getDefaultAllocator();
4565 	const deUint32						pixelCount							= m_data.width * m_data.height;
4566 	const deUint32						shaderGroupHandleSize				= getShaderGroupSize(vki, physicalDevice);
4567 	const deUint32						shaderGroupBaseAlignment			= getShaderGroupBaseAlignment(vki, physicalDevice);
4568 
4569 	const Move<VkDescriptorSetLayout>	descriptorSetLayout					= DescriptorSetLayoutBuilder()
4570 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
4571 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
4572 																					.build(vkd, device);
4573 	const Move<VkDescriptorPool>		descriptorPool						= DescriptorPoolBuilder()
4574 																					.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
4575 																					.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
4576 																					.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4577 	const Move<VkDescriptorSet>			descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
4578 	const Move<VkPipelineLayout>		pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
4579 
4580 	de::MovePtr<RayTracingPipeline>		rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
4581 	m_data.testConfiguration->initRayTracingShaders(rayTracingPipeline, m_context, m_data);
4582 	Move<VkPipeline>					pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
4583 
4584 	de::MovePtr<BufferWithMemory>		raygenShaderBindingTable;
4585 	de::MovePtr<BufferWithMemory>		hitShaderBindingTable;
4586 	de::MovePtr<BufferWithMemory>		missShaderBindingTable;
4587 	m_data.testConfiguration->initShaderBindingTables(rayTracingPipeline, m_context, m_data, *pipeline, shaderGroupHandleSize, shaderGroupBaseAlignment, raygenShaderBindingTable, hitShaderBindingTable, missShaderBindingTable);
4588 
4589 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(),	0),	shaderGroupHandleSize,	shaderGroupHandleSize);
4590 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(),		0),	shaderGroupHandleSize,	shaderGroupHandleSize);
4591 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(),		0),	shaderGroupHandleSize,	shaderGroupHandleSize);
4592 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL,																	0,						0);
4593 
4594 	const VkFormat						imageFormat							= m_data.testConfiguration->getResultImageFormat();
4595 	const VkImageCreateInfo				imageCreateInfo						= makeImageCreateInfo(m_data.width, m_data.height, imageFormat);
4596 	const VkImageSubresourceRange		imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
4597 	const de::MovePtr<ImageWithMemory>	image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
4598 	const Move<VkImageView>				imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, imageFormat, imageSubresourceRange);
4599 
4600 	const VkBufferCreateInfo			resultBufferCreateInfo				= makeBufferCreateInfo(pixelCount*m_data.testConfiguration->getResultImageFormatSize(), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4601 	const VkImageSubresourceLayers		resultBufferImageSubresourceLayers	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4602 	const VkBufferImageCopy				resultBufferImageRegion				= makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), resultBufferImageSubresourceLayers);
4603 	de::MovePtr<BufferWithMemory>		resultBuffer						= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
4604 
4605 	const VkDescriptorImageInfo			descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
4606 
4607 	const Move<VkCommandPool>			cmdPool								= createCommandPool(vkd, device, 0, queueFamilyIndex);
4608 	const Move<VkCommandBuffer>			cmdBuffer							= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4609 
4610 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomLevelAccelerationStructures;
4611 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructure;
4612 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomLevelAccelerationStructureCopies;
4613 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructureCopy;
4614 	std::vector<de::SharedPtr<SerialStorage>>						bottomSerialized;
4615 	std::vector<de::SharedPtr<SerialStorage>>						topSerialized;
4616 	std::vector<VkDeviceSize>			accelerationCompactedSizes;
4617 	std::vector<VkDeviceSize>			accelerationSerialSizes;
4618 	Move<VkQueryPool>					m_queryPoolCompact;
4619 	Move<VkQueryPool>					m_queryPoolSerial;
4620 
4621 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
4622 	{
4623 		const VkImageMemoryBarrier				preImageBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
4624 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4625 			**image, imageSubresourceRange);
4626 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
4627 		const VkClearValue						clearValue = m_data.testConfiguration->getClearValue();
4628 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
4629 		const VkImageMemoryBarrier				postImageBarrier = makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
4630 			VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
4631 			**image, imageSubresourceRange);
4632 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
4633 
4634 		// build bottom level acceleration structures and their copies ( only when we are testing copying bottom level acceleration structures )
4635 		bool									bottomCompact		= m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
4636 		const bool								buildWithoutGeom	= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM);
4637 		const bool								bottomNoPrimitives	= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM);
4638 		const bool								topNoPrimitives		= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP);
4639 		const bool								inactiveInstances	= (m_data.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_INSTANCES);
4640 		bottomLevelAccelerationStructures							= m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
4641 		VkBuildAccelerationStructureFlagsKHR	allowCompactionFlag	= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR;
4642 		VkBuildAccelerationStructureFlagsKHR	emptyCompactionFlag	= VkBuildAccelerationStructureFlagsKHR(0);
4643 		VkBuildAccelerationStructureFlagsKHR	bottomCompactFlags	= (bottomCompact ? allowCompactionFlag : emptyCompactionFlag);
4644 		VkBuildAccelerationStructureFlagsKHR	bottomBuildFlags	= m_data.buildFlags | bottomCompactFlags;
4645 		std::vector<VkAccelerationStructureKHR>	accelerationStructureHandles;
4646 		std::vector<VkDeviceSize>				bottomBlasCompactSize;
4647 		std::vector<VkDeviceSize>				bottomBlasSerialSize;
4648 
4649 		for (auto& blas : bottomLevelAccelerationStructures)
4650 		{
4651 			blas->setBuildType						(m_data.buildType);
4652 			blas->setBuildFlags						(bottomBuildFlags);
4653 			blas->setUseArrayOfPointers				(m_data.bottomUsesAOP);
4654 			blas->setCreateGeneric					(m_data.bottomGeneric);
4655 			blas->setCreationBufferUnbounded		(m_data.bottomUnboundedCreation);
4656 			blas->setBuildWithoutGeometries			(buildWithoutGeom);
4657 			blas->setBuildWithoutPrimitives			(bottomNoPrimitives);
4658 			blas->createAndBuild					(vkd, device, *cmdBuffer, allocator);
4659 			accelerationStructureHandles.push_back	(*(blas->getPtr()));
4660 		}
4661 
4662 		auto bottomLevelAccelerationStructuresPtr								= &bottomLevelAccelerationStructures;
4663 		// build top level acceleration structures and their copies ( only when we are testing copying top level acceleration structures )
4664 		bool									topCompact			= m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_TOP_ACCELERATION;
4665 		VkBuildAccelerationStructureFlagsKHR	topCompactFlags		= (topCompact ? allowCompactionFlag : emptyCompactionFlag);
4666 		VkBuildAccelerationStructureFlagsKHR	topBuildFlags		= m_data.buildFlags | topCompactFlags;
4667 		std::vector<VkAccelerationStructureKHR> topLevelStructureHandles;
4668 		std::vector<VkDeviceSize>				topBlasCompactSize;
4669 		std::vector<VkDeviceSize>				topBlasSerialSize;
4670 
4671 		topLevelAccelerationStructure								= m_data.testConfiguration->initTopAccelerationStructure(m_context, m_data, *bottomLevelAccelerationStructuresPtr);
4672 		topLevelAccelerationStructure->setBuildType					(m_data.buildType);
4673 		topLevelAccelerationStructure->setBuildFlags				(topBuildFlags);
4674 		topLevelAccelerationStructure->setBuildWithoutPrimitives	(topNoPrimitives);
4675 		topLevelAccelerationStructure->setUseArrayOfPointers		(m_data.topUsesAOP);
4676 		topLevelAccelerationStructure->setCreateGeneric				(m_data.topGeneric);
4677 		topLevelAccelerationStructure->setCreationBufferUnbounded	(m_data.topUnboundedCreation);
4678 		topLevelAccelerationStructure->setInactiveInstances			(inactiveInstances);
4679 		topLevelAccelerationStructure->createAndBuild				(vkd, device, *cmdBuffer, allocator);
4680 		topLevelStructureHandles.push_back							(*(topLevelAccelerationStructure->getPtr()));
4681 
4682 		const VkMemoryBarrier postBuildBarrier = makeMemoryBarrier(VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR);
4683 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postBuildBarrier);
4684 
4685 		if (m_data.updateCase == UpdateCase::VERTICES)
4686 		{
4687 			for (auto& blas : bottomLevelAccelerationStructures)
4688 			{
4689 				const std::vector<tcu::Vec3> vertices =
4690 				{
4691 					tcu::Vec3(0.0f, 0.0f, -0.5f),
4692 					tcu::Vec3(0.5f, 0.0f, -0.5f),
4693 					tcu::Vec3(0.0f, 0.5f, -0.5f),
4694 				};
4695 				const std::vector<deUint32> indices =
4696 				{
4697 					0,
4698 					1,
4699 					2
4700 				};
4701 				de::SharedPtr<RaytracedGeometryBase> geometry;
4702 				geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, m_data.vertexFormat, m_data.indexType);
4703 
4704 				for (auto it = begin(vertices), eit = end(vertices); it != eit; ++it)
4705 					geometry->addVertex(*it);
4706 
4707 				if (m_data.indexType != VK_INDEX_TYPE_NONE_KHR)
4708 				{
4709 					for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
4710 						geometry->addIndex(*it);
4711 				}
4712 				blas->updateGeometry(0, geometry);
4713 				blas->build(vkd, device, *cmdBuffer, blas.get());
4714 			}
4715 		}
4716 		else if (m_data.updateCase == UpdateCase::INDICES)
4717 		{
4718 			for (auto& blas : bottomLevelAccelerationStructures)
4719 			{
4720 				const std::vector<tcu::Vec3> vertices =
4721 				{
4722 					tcu::Vec3(0.0f, 0.0f, 0.0f),
4723 					tcu::Vec3(0.5f, 0.0f, 0.0f),
4724 					tcu::Vec3(0.0f, 0.5f, 0.0f),
4725 					tcu::Vec3(0.0f, 0.0f, -0.5f),
4726 					tcu::Vec3(0.5f, 0.0f, -0.5f),
4727 					tcu::Vec3(0.0f, 0.5f, -0.5f),
4728 				};
4729 
4730 				const std::vector<deUint32> indices =
4731 				{
4732 					3,
4733 					4,
4734 					5
4735 				};
4736 				de::SharedPtr<RaytracedGeometryBase> geometry;
4737 				geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, m_data.vertexFormat, m_data.indexType);
4738 
4739 				for (auto it = begin(vertices), eit = end(vertices); it != eit; ++it)
4740 					geometry->addVertex(*it);
4741 
4742 				if (m_data.indexType != VK_INDEX_TYPE_NONE_KHR)
4743 				{
4744 					for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
4745 						geometry->addIndex(*it);
4746 				}
4747 				blas->updateGeometry(0, geometry);
4748 				blas->build(vkd, device, *cmdBuffer, blas.get());
4749 			}
4750 		}
4751 		else if (m_data.updateCase == UpdateCase::TRANSFORM)
4752 		{
4753 			const VkTransformMatrixKHR translatedMatrix = { {
4754 				{ 1.0f, 0.0f, 0.0f, 0.0f },
4755 				{ 0.0f, 1.0f, 0.0f, 0.0f },
4756 				{ 0.0f, 0.0f, 1.0f, -0.5f }
4757 			} };
4758 			topLevelAccelerationStructure->updateInstanceMatrix(vkd, device, 0, translatedMatrix);
4759 			topLevelAccelerationStructure->build(vkd, device, *cmdBuffer, topLevelAccelerationStructure.get());
4760 		}
4761 
4762 		const TopLevelAccelerationStructure*			topLevelRayTracedPtr	= topLevelAccelerationStructure.get();
4763 		const VkMemoryBarrier preTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
4764 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &preTraceMemoryBarrier);
4765 
4766 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
4767 		{
4768 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4769 			DE_NULL,															//  const void*							pNext;
4770 			1u,																	//  deUint32							accelerationStructureCount;
4771 			topLevelRayTracedPtr->getPtr(),										//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4772 		};
4773 
4774 		DescriptorSetUpdateBuilder()
4775 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
4776 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4777 			.update(vkd, device);
4778 
4779 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
4780 
4781 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4782 
4783 		cmdTraceRays(vkd,
4784 			*cmdBuffer,
4785 			&raygenShaderBindingTableRegion,
4786 			&missShaderBindingTableRegion,
4787 			&hitShaderBindingTableRegion,
4788 			&callableShaderBindingTableRegion,
4789 			m_data.width, m_data.height, 1);
4790 
4791 		const VkMemoryBarrier				postTraceMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
4792 		const VkMemoryBarrier				postCopyMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4793 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
4794 
4795 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u, &resultBufferImageRegion);
4796 
4797 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
4798 	}
4799 	endCommandBuffer(vkd, *cmdBuffer);
4800 
4801 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
4802 
4803 	invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(), resultBuffer->getAllocation().getOffset(), pixelCount * sizeof(deUint32));
4804 
4805 	bool result = m_data.testConfiguration->verifyImage(resultBuffer.get(), m_context, m_data);
4806 
4807 	if (result)
4808 		return tcu::TestStatus::pass("Pass");
4809 	else
4810 		return tcu::TestStatus::fail("Fail");
4811 }
4812 
4813 }	// anonymous
4814 
addBasicBuildingTests(tcu::TestCaseGroup * group)4815 void addBasicBuildingTests(tcu::TestCaseGroup* group)
4816 {
4817 	struct
4818 	{
4819 		vk::VkAccelerationStructureBuildTypeKHR	buildType;
4820 		const char*								name;
4821 	} buildTypes[] =
4822 	{
4823 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,				"cpu_built"	},
4824 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,				"gpu_built"	},
4825 	};
4826 
4827 	struct
4828 	{
4829 		BottomTestType							testType;
4830 		bool									usesAOP;
4831 		const char*								name;
4832 	} bottomTestTypes[] =
4833 	{
4834 		{ BottomTestType::TRIANGLES,	false,							"triangles" },
4835 		{ BottomTestType::TRIANGLES,	true,							"triangles_aop" },
4836 		{ BottomTestType::AABBS,		false,							"aabbs" },
4837 		{ BottomTestType::AABBS,		true,							"aabbs_aop" },
4838 	};
4839 
4840 	struct
4841 	{
4842 		TopTestType								testType;
4843 		bool									usesAOP;
4844 		const char*								name;
4845 	} topTestTypes[] =
4846 	{
4847 		{ TopTestType::IDENTICAL_INSTANCES,	false,						"identical_instances" },
4848 		{ TopTestType::IDENTICAL_INSTANCES,	true,						"identical_instances_aop" },
4849 		{ TopTestType::DIFFERENT_INSTANCES,	false,						"different_instances" },
4850 		{ TopTestType::DIFFERENT_INSTANCES,	true,						"different_instances_aop" },
4851 	};
4852 
4853 	struct BuildFlagsData
4854 	{
4855 		VkBuildAccelerationStructureFlagsKHR	flags;
4856 		const char*								name;
4857 	};
4858 
4859 	BuildFlagsData optimizationTypes[] =
4860 	{
4861 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
4862 		{ VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,	"fasttrace" },
4863 		{ VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR,	"fastbuild" },
4864 	};
4865 
4866 	BuildFlagsData updateTypes[] =
4867 	{
4868 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
4869 		{ VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR,			"update" },
4870 	};
4871 
4872 	BuildFlagsData compactionTypes[] =
4873 	{
4874 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
4875 		{ VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR,		"compaction" },
4876 	};
4877 
4878 	BuildFlagsData lowMemoryTypes[] =
4879 	{
4880 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
4881 		{ VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_KHR,			"lowmemory" },
4882 	};
4883 
4884 	struct
4885 	{
4886 		bool		padVertices;
4887 		const char*	name;
4888 	} paddingType[] =
4889 	{
4890 		{ false,	"nopadding"	},
4891 		{ true,		"padded"	},
4892 	};
4893 
4894 	struct
4895 	{
4896 		bool		topGeneric;
4897 		bool		bottomGeneric;
4898 		const char*	suffix;
4899 	} createGenericParams[] =
4900 	{
4901 		{	false,	false,	""					},
4902 		{	false,	true,	"_bottomgeneric"	},
4903 		{	true,	false,	"_topgeneric"		},
4904 		{	true,	true,	"_bothgeneric"		},
4905 	};
4906 
4907 	// In order not to create thousands of new test variants for unbound buffer memory on acceleration structure creation, we will
4908 	// set these options on some of the tests.
4909 	de::ModCounter32 unboundedCreationBottomCounter	(3u);
4910 	de::ModCounter32 unboundedCreationTopCounter	(7u);
4911 
4912 	for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
4913 	{
4914 		de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name));
4915 
4916 		for (size_t bottomNdx = 0; bottomNdx < DE_LENGTH_OF_ARRAY(bottomTestTypes); ++bottomNdx)
4917 		{
4918 			de::MovePtr<tcu::TestCaseGroup> bottomGroup(new tcu::TestCaseGroup(group->getTestContext(), bottomTestTypes[bottomNdx].name));
4919 
4920 			for (size_t topNdx = 0; topNdx < DE_LENGTH_OF_ARRAY(topTestTypes); ++topNdx)
4921 			{
4922 				de::MovePtr<tcu::TestCaseGroup> topGroup(new tcu::TestCaseGroup(group->getTestContext(), topTestTypes[topNdx].name));
4923 
4924 				for (int paddingTypeIdx = 0; paddingTypeIdx < DE_LENGTH_OF_ARRAY(paddingType); ++paddingTypeIdx)
4925 				{
4926 					de::MovePtr<tcu::TestCaseGroup> paddingGroup(new tcu::TestCaseGroup(group->getTestContext(), paddingType[paddingTypeIdx].name));
4927 
4928 					for (size_t optimizationNdx = 0; optimizationNdx < DE_LENGTH_OF_ARRAY(optimizationTypes); ++optimizationNdx)
4929 					{
4930 						for (size_t updateNdx = 0; updateNdx < DE_LENGTH_OF_ARRAY(updateTypes); ++updateNdx)
4931 						{
4932 							for (size_t compactionNdx = 0; compactionNdx < DE_LENGTH_OF_ARRAY(compactionTypes); ++compactionNdx)
4933 							{
4934 								for (size_t lowMemoryNdx = 0; lowMemoryNdx < DE_LENGTH_OF_ARRAY(lowMemoryTypes); ++lowMemoryNdx)
4935 								{
4936 									for (int createGenericIdx = 0; createGenericIdx < DE_LENGTH_OF_ARRAY(createGenericParams); ++createGenericIdx)
4937 									{
4938 										std::string testName =
4939 											std::string(optimizationTypes[optimizationNdx].name) + "_" +
4940 											std::string(updateTypes[updateNdx].name) + "_" +
4941 											std::string(compactionTypes[compactionNdx].name) + "_" +
4942 											std::string(lowMemoryTypes[lowMemoryNdx].name) +
4943 											std::string(createGenericParams[createGenericIdx].suffix);
4944 
4945 										const bool unboundedCreationBottom	= (static_cast<uint32_t>(unboundedCreationBottomCounter++) == 0u);
4946 										const bool unboundedCreationTop		= (static_cast<uint32_t>(unboundedCreationTopCounter++) == 0u);
4947 
4948 										TestParams testParams
4949 										{
4950 											buildTypes[buildTypeNdx].buildType,
4951 											VK_FORMAT_R32G32B32_SFLOAT,
4952 											paddingType[paddingTypeIdx].padVertices,
4953 											VK_INDEX_TYPE_NONE_KHR,
4954 											bottomTestTypes[bottomNdx].testType,
4955 											InstanceCullFlags::NONE,
4956 											bottomTestTypes[bottomNdx].usesAOP,
4957 											createGenericParams[createGenericIdx].bottomGeneric,
4958 											unboundedCreationBottom,
4959 											topTestTypes[topNdx].testType,
4960 											topTestTypes[topNdx].usesAOP,
4961 											createGenericParams[createGenericIdx].topGeneric,
4962 											unboundedCreationTop,
4963 											optimizationTypes[optimizationNdx].flags | updateTypes[updateNdx].flags | compactionTypes[compactionNdx].flags | lowMemoryTypes[lowMemoryNdx].flags,
4964 											OT_NONE,
4965 											OP_NONE,
4966 											RTAS_DEFAULT_SIZE,
4967 											RTAS_DEFAULT_SIZE,
4968 											de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
4969 											0u,
4970 											EmptyAccelerationStructureCase::NOT_EMPTY,
4971 											InstanceCustomIndexCase::NONE,
4972 											false,
4973 											0xFFu,
4974 											UpdateCase::NONE,
4975 										};
4976 										paddingGroup->addChild(new RayTracingASBasicTestCase(group->getTestContext(), testName.c_str(), testParams));
4977 									}
4978 								}
4979 							}
4980 						}
4981 					}
4982 					topGroup->addChild(paddingGroup.release());
4983 				}
4984 				bottomGroup->addChild(topGroup.release());
4985 			}
4986 			buildGroup->addChild(bottomGroup.release());
4987 		}
4988 		group->addChild(buildGroup.release());
4989 	}
4990 }
4991 
addVertexIndexFormatsTests(tcu::TestCaseGroup * group)4992 void addVertexIndexFormatsTests(tcu::TestCaseGroup* group)
4993 {
4994 	struct
4995 	{
4996 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
4997 		const char*											name;
4998 	} buildTypes[] =
4999 	{
5000 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5001 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5002 	};
5003 
5004 	const VkFormat vertexFormats[] =
5005 	{
5006 		// Mandatory formats.
5007 		VK_FORMAT_R32G32_SFLOAT,
5008 		VK_FORMAT_R32G32B32_SFLOAT,
5009 		VK_FORMAT_R16G16_SFLOAT,
5010 		VK_FORMAT_R16G16B16A16_SFLOAT,
5011 		VK_FORMAT_R16G16_SNORM,
5012 		VK_FORMAT_R16G16B16A16_SNORM,
5013 
5014 		// Additional formats.
5015 		VK_FORMAT_R8G8_SNORM,
5016 		VK_FORMAT_R8G8B8_SNORM,
5017 		VK_FORMAT_R8G8B8A8_SNORM,
5018 		VK_FORMAT_R16G16B16_SNORM,
5019 		VK_FORMAT_R16G16B16_SFLOAT,
5020 		VK_FORMAT_R32G32B32A32_SFLOAT,
5021 		VK_FORMAT_R64G64_SFLOAT,
5022 		VK_FORMAT_R64G64B64_SFLOAT,
5023 		VK_FORMAT_R64G64B64A64_SFLOAT,
5024 	};
5025 
5026 	struct
5027 	{
5028 		VkIndexType								indexType;
5029 		const char*								name;
5030 	} indexFormats[] =
5031 	{
5032 		{ VK_INDEX_TYPE_NONE_KHR ,				"index_none"		},
5033 		{ VK_INDEX_TYPE_UINT16 ,				"index_uint16"	},
5034 		{ VK_INDEX_TYPE_UINT32 ,				"index_uint32"	},
5035 	};
5036 
5037 	struct
5038 	{
5039 		bool		padVertices;
5040 		const char*	name;
5041 	} paddingType[] =
5042 	{
5043 		{ false,	"nopadding"	},
5044 		{ true,		"padded"	},
5045 	};
5046 
5047 	for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
5048 	{
5049 		de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name));
5050 
5051 		for (size_t vertexFormatNdx = 0; vertexFormatNdx < DE_LENGTH_OF_ARRAY(vertexFormats); ++vertexFormatNdx)
5052 		{
5053 			const auto format		= vertexFormats[vertexFormatNdx];
5054 			const auto formatName	= getFormatSimpleName(format);
5055 
5056 			de::MovePtr<tcu::TestCaseGroup> vertexFormatGroup(new tcu::TestCaseGroup(group->getTestContext(), formatName.c_str()));
5057 
5058 			for (int paddingIdx = 0; paddingIdx < DE_LENGTH_OF_ARRAY(paddingType); ++paddingIdx)
5059 			{
5060 				de::MovePtr<tcu::TestCaseGroup> paddingGroup(new tcu::TestCaseGroup(group->getTestContext(), paddingType[paddingIdx].name));
5061 
5062 				for (size_t indexFormatNdx = 0; indexFormatNdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatNdx)
5063 				{
5064 					TestParams testParams
5065 					{
5066 						buildTypes[buildTypeNdx].buildType,
5067 						format,
5068 						paddingType[paddingIdx].padVertices,
5069 						indexFormats[indexFormatNdx].indexType,
5070 						BottomTestType::TRIANGLES,
5071 						InstanceCullFlags::NONE,
5072 						false,
5073 						false,
5074 						false,
5075 						TopTestType::IDENTICAL_INSTANCES,
5076 						false,
5077 						false,
5078 						false,
5079 						VkBuildAccelerationStructureFlagsKHR(0u),
5080 						OT_NONE,
5081 						OP_NONE,
5082 						RTAS_DEFAULT_SIZE,
5083 						RTAS_DEFAULT_SIZE,
5084 						de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
5085 						0u,
5086 						EmptyAccelerationStructureCase::NOT_EMPTY,
5087 						InstanceCustomIndexCase::NONE,
5088 						false,
5089 						0xFFu,
5090 						UpdateCase::NONE,
5091 					};
5092 					paddingGroup->addChild(new RayTracingASBasicTestCase(group->getTestContext(), indexFormats[indexFormatNdx].name, testParams));
5093 				}
5094 				vertexFormatGroup->addChild(paddingGroup.release());
5095 			}
5096 			buildGroup->addChild(vertexFormatGroup.release());
5097 		}
5098 		group->addChild(buildGroup.release());
5099 	}
5100 }
5101 
addOperationTestsImpl(tcu::TestCaseGroup * group,const deUint32 workerThreads)5102 void addOperationTestsImpl (tcu::TestCaseGroup* group, const deUint32 workerThreads)
5103 {
5104 	struct
5105 	{
5106 		OperationType										operationType;
5107 		const char*											name;
5108 	} operationTypes[] =
5109 	{
5110 		{ OP_COPY,											"copy"			},
5111 		{ OP_COMPACT,										"compaction"	},
5112 		{ OP_SERIALIZE,										"serialization"	},
5113 	};
5114 
5115 	struct
5116 	{
5117 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
5118 		const char*											name;
5119 	} buildTypes[] =
5120 	{
5121 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5122 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5123 	};
5124 
5125 	struct
5126 	{
5127 		OperationTarget										operationTarget;
5128 		const char*											name;
5129 	} operationTargets[] =
5130 	{
5131 		{ OT_TOP_ACCELERATION,								"top_acceleration_structure"		},
5132 		{ OT_BOTTOM_ACCELERATION,							"bottom_acceleration_structure"	},
5133 	};
5134 
5135 	struct
5136 	{
5137 		BottomTestType										testType;
5138 		const char*											name;
5139 	} bottomTestTypes[] =
5140 	{
5141 		{ BottomTestType::TRIANGLES,						"triangles" },
5142 		{ BottomTestType::AABBS,							"aabbs" },
5143 	};
5144 
5145 	for (size_t operationTypeNdx = 0; operationTypeNdx < DE_LENGTH_OF_ARRAY(operationTypes); ++operationTypeNdx)
5146 	{
5147 		if (workerThreads > 0)
5148 			if (operationTypes[operationTypeNdx].operationType != OP_COPY && operationTypes[operationTypeNdx].operationType != OP_SERIALIZE)
5149 				continue;
5150 
5151 		de::MovePtr<tcu::TestCaseGroup> operationTypeGroup(new tcu::TestCaseGroup(group->getTestContext(), operationTypes[operationTypeNdx].name));
5152 
5153 		for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
5154 		{
5155 			if (workerThreads > 0 && buildTypes[buildTypeNdx].buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
5156 				continue;
5157 
5158 			de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name));
5159 
5160 			for (size_t operationTargetNdx = 0; operationTargetNdx < DE_LENGTH_OF_ARRAY(operationTargets); ++operationTargetNdx)
5161 			{
5162 				de::MovePtr<tcu::TestCaseGroup> operationTargetGroup(new tcu::TestCaseGroup(group->getTestContext(), operationTargets[operationTargetNdx].name));
5163 
5164 				for (size_t testTypeNdx = 0; testTypeNdx < DE_LENGTH_OF_ARRAY(bottomTestTypes); ++testTypeNdx)
5165 				{
5166 					TopTestType topTest = (operationTargets[operationTargetNdx].operationTarget == OT_TOP_ACCELERATION) ? TopTestType::DIFFERENT_INSTANCES : TopTestType::IDENTICAL_INSTANCES;
5167 
5168 					TestParams testParams
5169 					{
5170 						buildTypes[buildTypeNdx].buildType,
5171 						VK_FORMAT_R32G32B32_SFLOAT,
5172 						false,
5173 						VK_INDEX_TYPE_NONE_KHR,
5174 						bottomTestTypes[testTypeNdx].testType,
5175 						InstanceCullFlags::NONE,
5176 						false,
5177 						false,
5178 						false,
5179 						topTest,
5180 						false,
5181 						false,
5182 						false,
5183 						VkBuildAccelerationStructureFlagsKHR(0u),
5184 						operationTargets[operationTargetNdx].operationTarget,
5185 						operationTypes[operationTypeNdx].operationType,
5186 						RTAS_DEFAULT_SIZE,
5187 						RTAS_DEFAULT_SIZE,
5188 						de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5189 						workerThreads,
5190 						EmptyAccelerationStructureCase::NOT_EMPTY,
5191 						InstanceCustomIndexCase::NONE,
5192 						false,
5193 						0xFFu,
5194 						UpdateCase::NONE,
5195 					};
5196 					operationTargetGroup->addChild(new RayTracingASBasicTestCase(group->getTestContext(), bottomTestTypes[testTypeNdx].name, testParams));
5197 				}
5198 				buildGroup->addChild(operationTargetGroup.release());
5199 			}
5200 			operationTypeGroup->addChild(buildGroup.release());
5201 		}
5202 		group->addChild(operationTypeGroup.release());
5203 	}
5204 }
5205 
addOperationTests(tcu::TestCaseGroup * group)5206 void addOperationTests (tcu::TestCaseGroup* group)
5207 {
5208 	addOperationTestsImpl(group, 0);
5209 }
5210 
addHostThreadingOperationTests(tcu::TestCaseGroup * group)5211 void addHostThreadingOperationTests (tcu::TestCaseGroup* group)
5212 {
5213 	const deUint32	threads[]	= { 1, 2, 3, 4, 8, std::numeric_limits<deUint32>::max() };
5214 
5215 	for (size_t threadsNdx = 0; threadsNdx < DE_LENGTH_OF_ARRAY(threads); ++threadsNdx)
5216 	{
5217 		const std::string groupName = threads[threadsNdx] != std::numeric_limits<deUint32>::max()
5218 									? de::toString(threads[threadsNdx])
5219 									: "max";
5220 
5221 		de::MovePtr<tcu::TestCaseGroup> threadGroup(new tcu::TestCaseGroup(group->getTestContext(), groupName.c_str()));
5222 
5223 		addOperationTestsImpl(threadGroup.get(), threads[threadsNdx]);
5224 
5225 		group->addChild(threadGroup.release());
5226 	}
5227 }
5228 
addFuncArgTests(tcu::TestCaseGroup * group)5229 void addFuncArgTests (tcu::TestCaseGroup* group)
5230 {
5231 	const struct
5232 	{
5233 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
5234 		const char*											name;
5235 	} buildTypes[] =
5236 	{
5237 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5238 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5239 	};
5240 
5241 	auto& ctx = group->getTestContext();
5242 
5243 	for (int buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
5244 	{
5245 		TestParams testParams
5246 		{
5247 			buildTypes[buildTypeNdx].buildType,
5248 			VK_FORMAT_R32G32B32_SFLOAT,
5249 			false,
5250 			VK_INDEX_TYPE_NONE_KHR,
5251 			BottomTestType::TRIANGLES,
5252 			InstanceCullFlags::NONE,
5253 			false,
5254 			false,
5255 			false,
5256 			TopTestType::IDENTICAL_INSTANCES,
5257 			false,
5258 			false,
5259 			false,
5260 			VkBuildAccelerationStructureFlagsKHR(0u),
5261 			OT_NONE,
5262 			OP_NONE,
5263 			RTAS_DEFAULT_SIZE,
5264 			RTAS_DEFAULT_SIZE,
5265 			de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
5266 			0u,
5267 			EmptyAccelerationStructureCase::NOT_EMPTY,
5268 			InstanceCustomIndexCase::NONE,
5269 			false,
5270 			0xFFu,
5271 			UpdateCase::NONE,
5272 		};
5273 
5274 		group->addChild(new RayTracingASFuncArgTestCase(ctx, buildTypes[buildTypeNdx].name, testParams));
5275 	}
5276 }
5277 
addInstanceTriangleCullingTests(tcu::TestCaseGroup * group)5278 void addInstanceTriangleCullingTests (tcu::TestCaseGroup* group)
5279 {
5280 	const struct
5281 	{
5282 		InstanceCullFlags	cullFlags;
5283 		std::string			name;
5284 	} cullFlags[] =
5285 	{
5286 		{ InstanceCullFlags::NONE,				"noflags"		},
5287 		{ InstanceCullFlags::COUNTERCLOCKWISE,	"ccw"			},
5288 		{ InstanceCullFlags::CULL_DISABLE,		"nocull"		},
5289 		{ InstanceCullFlags::ALL,				"ccw_nocull"	},
5290 	};
5291 
5292 	const struct
5293 	{
5294 		TopTestType	topType;
5295 		std::string	name;
5296 	} topType[] =
5297 	{
5298 		{ TopTestType::DIFFERENT_INSTANCES, "transformed"	},	// Each instance has its own transformation matrix.
5299 		{ TopTestType::IDENTICAL_INSTANCES, "notransform"	},	// "Identical" instances, different geometries.
5300 	};
5301 
5302 	const struct
5303 	{
5304 		vk::VkAccelerationStructureBuildTypeKHR	buildType;
5305 		std::string								name;
5306 	} buildTypes[] =
5307 	{
5308 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5309 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5310 	};
5311 
5312 	const struct
5313 	{
5314 		VkIndexType	indexType;
5315 		std::string	name;
5316 	} indexFormats[] =
5317 	{
5318 		{ VK_INDEX_TYPE_NONE_KHR ,	"index_none"	},
5319 		{ VK_INDEX_TYPE_UINT16 ,	"index_uint16"	},
5320 		{ VK_INDEX_TYPE_UINT32 ,	"index_uint32"	},
5321 	};
5322 
5323 	auto& ctx = group->getTestContext();
5324 
5325 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5326 	{
5327 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5328 
5329 		for (int indexFormatIdx = 0; indexFormatIdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatIdx)
5330 		{
5331 			de::MovePtr<tcu::TestCaseGroup> indexTypeGroup(new tcu::TestCaseGroup(ctx, indexFormats[indexFormatIdx].name.c_str()));
5332 
5333 			for (int topTypeIdx = 0; topTypeIdx < DE_LENGTH_OF_ARRAY(topType); ++topTypeIdx)
5334 			{
5335 				for (int cullFlagsIdx = 0; cullFlagsIdx < DE_LENGTH_OF_ARRAY(cullFlags); ++cullFlagsIdx)
5336 				{
5337 					const std::string testName = topType[topTypeIdx].name + "_" + cullFlags[cullFlagsIdx].name;
5338 
5339 					TestParams testParams
5340 					{
5341 						buildTypes[buildTypeIdx].buildType,
5342 						VK_FORMAT_R32G32B32_SFLOAT,
5343 						false,
5344 						indexFormats[indexFormatIdx].indexType,
5345 						BottomTestType::TRIANGLES,
5346 						cullFlags[cullFlagsIdx].cullFlags,
5347 						false,
5348 						false,
5349 						false,
5350 						topType[topTypeIdx].topType,
5351 						false,
5352 						false,
5353 						false,
5354 						VkBuildAccelerationStructureFlagsKHR(0u),
5355 						OT_NONE,
5356 						OP_NONE,
5357 						RTAS_DEFAULT_SIZE,
5358 						RTAS_DEFAULT_SIZE,
5359 						de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5360 						0u,
5361 						EmptyAccelerationStructureCase::NOT_EMPTY,
5362 						InstanceCustomIndexCase::NONE,
5363 						false,
5364 						0xFFu,
5365 						UpdateCase::NONE,
5366 					};
5367 					indexTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, testName.c_str(), testParams));
5368 				}
5369 			}
5370 			buildTypeGroup->addChild(indexTypeGroup.release());
5371 		}
5372 		group->addChild(buildTypeGroup.release());
5373 	}
5374 }
5375 
addDynamicIndexingTests(tcu::TestCaseGroup * group)5376 void addDynamicIndexingTests (tcu::TestCaseGroup* group)
5377 {
5378 	auto& ctx = group->getTestContext();
5379 	group->addChild(new RayTracingASDynamicIndexingTestCase(ctx, "dynamic_indexing"));
5380 }
5381 
addEmptyAccelerationStructureTests(tcu::TestCaseGroup * group)5382 void addEmptyAccelerationStructureTests (tcu::TestCaseGroup* group)
5383 {
5384 	const struct
5385 	{
5386 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
5387 		std::string											name;
5388 	} buildTypes[] =
5389 	{
5390 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5391 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5392 	};
5393 
5394 	const struct
5395 	{
5396 		VkIndexType								indexType;
5397 		std::string								name;
5398 	} indexFormats[] =
5399 	{
5400 		{ VK_INDEX_TYPE_NONE_KHR,				"index_none"	},
5401 		{ VK_INDEX_TYPE_UINT16,					"index_uint16"	},
5402 		{ VK_INDEX_TYPE_UINT32,					"index_uint32"	},
5403 	};
5404 
5405 	const struct
5406 	{
5407 		EmptyAccelerationStructureCase	emptyASCase;
5408 		std::string						name;
5409 	} emptyCases[] =
5410 	{
5411 		{ EmptyAccelerationStructureCase::INACTIVE_TRIANGLES,	"inactive_triangles"	},
5412 		{ EmptyAccelerationStructureCase::INACTIVE_INSTANCES,	"inactive_instances"	},
5413 		{ EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM,	"no_geometries_bottom"	},
5414 		{ EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP,	"no_primitives_top"		},
5415 		{ EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM,	"no_primitives_bottom"	},
5416 	};
5417 
5418 	auto& ctx = group->getTestContext();
5419 
5420 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5421 	{
5422 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5423 
5424 		for (int indexFormatIdx = 0; indexFormatIdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatIdx)
5425 		{
5426 			de::MovePtr<tcu::TestCaseGroup> indexTypeGroup(new tcu::TestCaseGroup(ctx, indexFormats[indexFormatIdx].name.c_str()));
5427 
5428 			for (int emptyCaseIdx = 0; emptyCaseIdx < DE_LENGTH_OF_ARRAY(emptyCases); ++emptyCaseIdx)
5429 			{
5430 
5431 				TestParams testParams
5432 				{
5433 					buildTypes[buildTypeIdx].buildType,
5434 					VK_FORMAT_R32G32B32_SFLOAT,
5435 					false,
5436 					indexFormats[indexFormatIdx].indexType,
5437 					BottomTestType::TRIANGLES,
5438 					InstanceCullFlags::NONE,
5439 					false,
5440 					false,
5441 					false,
5442 					TopTestType::IDENTICAL_INSTANCES,
5443 					false,
5444 					false,
5445 					false,
5446 					VkBuildAccelerationStructureFlagsKHR(0u),
5447 					OT_NONE,
5448 					OP_NONE,
5449 					RTAS_DEFAULT_SIZE,
5450 					RTAS_DEFAULT_SIZE,
5451 					de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
5452 					0u,
5453 					emptyCases[emptyCaseIdx].emptyASCase,
5454 					InstanceCustomIndexCase::NONE,
5455 					false,
5456 					0xFFu,
5457 					UpdateCase::NONE,
5458 				};
5459 				indexTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, emptyCases[emptyCaseIdx].name.c_str(), testParams));
5460 			}
5461 			buildTypeGroup->addChild(indexTypeGroup.release());
5462 		}
5463 		group->addChild(buildTypeGroup.release());
5464 	}
5465 }
5466 
addInstanceIndexTests(tcu::TestCaseGroup * group)5467 void addInstanceIndexTests (tcu::TestCaseGroup* group)
5468 {
5469 	const struct
5470 	{
5471 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
5472 		std::string											name;
5473 	} buildTypes[] =
5474 	{
5475 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5476 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5477 	};
5478 
5479 	const struct
5480 	{
5481 		InstanceCustomIndexCase						customIndexCase;
5482 		std::string									name;
5483 	} customIndexCases[] =
5484 	{
5485 		{ InstanceCustomIndexCase::NONE,			"no_instance_index"	},
5486 		{ InstanceCustomIndexCase::ANY_HIT,			"ahit"				},
5487 		{ InstanceCustomIndexCase::CLOSEST_HIT,		"chit"				},
5488 		{ InstanceCustomIndexCase::INTERSECTION,	"isec"				},
5489 	};
5490 
5491 	auto& ctx = group->getTestContext();
5492 
5493 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5494 	{
5495 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5496 
5497 		for (int customIndexCaseIdx = 0; customIndexCaseIdx < DE_LENGTH_OF_ARRAY(customIndexCases); ++customIndexCaseIdx)
5498 		{
5499 			const auto&	idxCase				= customIndexCases[customIndexCaseIdx].customIndexCase;
5500 			const auto	bottomGeometryType	= ((idxCase == InstanceCustomIndexCase::INTERSECTION) ? BottomTestType::AABBS : BottomTestType::TRIANGLES);
5501 
5502 			TestParams testParams
5503 			{
5504 				buildTypes[buildTypeIdx].buildType,
5505 				VK_FORMAT_R32G32B32_SFLOAT,
5506 				false,
5507 				VK_INDEX_TYPE_NONE_KHR,
5508 				bottomGeometryType,
5509 				InstanceCullFlags::NONE,
5510 				false,
5511 				false,
5512 				false,
5513 				TopTestType::IDENTICAL_INSTANCES,
5514 				false,
5515 				false,
5516 				false,
5517 				VkBuildAccelerationStructureFlagsKHR(0u),
5518 				OT_NONE,
5519 				OP_NONE,
5520 				RTAS_DEFAULT_SIZE,
5521 				RTAS_DEFAULT_SIZE,
5522 				de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5523 				0u,
5524 				EmptyAccelerationStructureCase::NOT_EMPTY,
5525 				customIndexCases[customIndexCaseIdx].customIndexCase,
5526 				false,
5527 				0xFFu,
5528 				UpdateCase::NONE,
5529 			};
5530 			buildTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, customIndexCases[customIndexCaseIdx].name.c_str(), testParams));
5531 		}
5532 		group->addChild(buildTypeGroup.release());
5533 	}
5534 }
5535 
addInstanceUpdateTests(tcu::TestCaseGroup * group)5536 void addInstanceUpdateTests (tcu::TestCaseGroup* group)
5537 {
5538 	const struct
5539 	{
5540 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
5541 		std::string											name;
5542 	} buildTypes[] =
5543 	{
5544 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5545 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5546 	};
5547 
5548 	struct
5549 	{
5550 		OperationType										operationType;
5551 		const char*											name;
5552 	} operationTypes[] =
5553 	{
5554 		{ OP_UPDATE,											"update"			},
5555 		{ OP_UPDATE_IN_PLACE,									"update_in_place"	},
5556 	};
5557 
5558 
5559 	auto& ctx = group->getTestContext();
5560 
5561 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5562 	{
5563 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5564 
5565 		for (int operationTypesIdx = 0; operationTypesIdx < DE_LENGTH_OF_ARRAY(operationTypes); ++operationTypesIdx)
5566 		{
5567 			TestParams testParams
5568 			{
5569 				buildTypes[buildTypeIdx].buildType,
5570 				VK_FORMAT_R32G32B32_SFLOAT,
5571 				false,
5572 				VK_INDEX_TYPE_NONE_KHR,
5573 				BottomTestType::TRIANGLES,
5574 				InstanceCullFlags::NONE,
5575 				false,
5576 				false,
5577 				false,
5578 				TopTestType::IDENTICAL_INSTANCES,
5579 				false,
5580 				false,
5581 				false,
5582 				VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR,
5583 				OT_TOP_ACCELERATION,
5584 				operationTypes[operationTypesIdx].operationType,
5585 				RTAS_DEFAULT_SIZE,
5586 				RTAS_DEFAULT_SIZE,
5587 				de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
5588 				0u,
5589 				EmptyAccelerationStructureCase::NOT_EMPTY,
5590 				InstanceCustomIndexCase::NONE,
5591 				false,
5592 				0xFFu,
5593 				UpdateCase::NONE,
5594 			};
5595 			buildTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, operationTypes[operationTypesIdx].name, testParams));
5596 		}
5597 		group->addChild(buildTypeGroup.release());
5598 	}
5599 }
5600 
addInstanceRayCullMaskTests(tcu::TestCaseGroup * group)5601 void addInstanceRayCullMaskTests(tcu::TestCaseGroup* group)
5602 {
5603 	const struct
5604 	{
5605 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
5606 		std::string											name;
5607 	} buildTypes[] =
5608 	{
5609 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5610 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5611 	};
5612 
5613 	const struct
5614 	{
5615 		InstanceCustomIndexCase						customIndexCase;
5616 		std::string									name;
5617 	} customIndexCases[] =
5618 	{
5619 		{ InstanceCustomIndexCase::ANY_HIT,			"ahit"				},
5620 		{ InstanceCustomIndexCase::CLOSEST_HIT,		"chit"				},
5621 		{ InstanceCustomIndexCase::INTERSECTION,	"isec"				},
5622 	};
5623 
5624 	const struct
5625 	{
5626 		uint32_t		cullMask;
5627 		std::string		name;
5628 	} cullMask[] =
5629 	{
5630 		{ 0x000000AAu,	"4_bits"},
5631 		{ 0x00000055u,	"4_bits_reverse"},
5632 		{ 0xAAAAAAAAu,	"16_bits"},
5633 		{ 0x55555555u,	"16_bits_reverse"},
5634 	};
5635 
5636 	auto& ctx = group->getTestContext();
5637 
5638 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5639 	{
5640 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5641 
5642 		for (int customIndexCaseIdx = 0; customIndexCaseIdx < DE_LENGTH_OF_ARRAY(customIndexCases); ++customIndexCaseIdx)
5643 		{
5644 			de::MovePtr<tcu::TestCaseGroup> customIndexCaseGroup(new tcu::TestCaseGroup(ctx, customIndexCases[customIndexCaseIdx].name.c_str()));
5645 
5646 			for (int cullMaskIdx = 0; cullMaskIdx < DE_LENGTH_OF_ARRAY(cullMask); ++cullMaskIdx)
5647 			{
5648 				const auto& idxCase = customIndexCases[customIndexCaseIdx].customIndexCase;
5649 				const auto	bottomGeometryType = ((idxCase == InstanceCustomIndexCase::INTERSECTION) ? BottomTestType::AABBS : BottomTestType::TRIANGLES);
5650 
5651 				TestParams testParams
5652 				{
5653 					buildTypes[buildTypeIdx].buildType,
5654 					VK_FORMAT_R32G32B32_SFLOAT,
5655 					false,
5656 					VK_INDEX_TYPE_NONE_KHR,
5657 					bottomGeometryType,
5658 					InstanceCullFlags::NONE,
5659 					false,
5660 					false,
5661 					false,
5662 					TopTestType::IDENTICAL_INSTANCES,
5663 					false,
5664 					false,
5665 					false,
5666 					VkBuildAccelerationStructureFlagsKHR(0u),
5667 					OT_NONE,
5668 					OP_NONE,
5669 					RTAS_DEFAULT_SIZE,
5670 					RTAS_DEFAULT_SIZE,
5671 					de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5672 					0u,
5673 					EmptyAccelerationStructureCase::NOT_EMPTY,
5674 					customIndexCases[customIndexCaseIdx].customIndexCase,
5675 					true,
5676 					cullMask[cullMaskIdx].cullMask,
5677 					UpdateCase::NONE,
5678 				};
5679 				customIndexCaseGroup->addChild(new RayTracingASBasicTestCase(ctx,  cullMask[cullMaskIdx].name.c_str(), testParams));
5680 			}
5681 			buildTypeGroup->addChild(customIndexCaseGroup.release());
5682 		}
5683 		group->addChild(buildTypeGroup.release());
5684 	}
5685 }
5686 
5687 
addGetDeviceAccelerationStructureCompabilityTests(tcu::TestCaseGroup * group)5688 void addGetDeviceAccelerationStructureCompabilityTests (tcu::TestCaseGroup* group)
5689 {
5690 	struct
5691 	{
5692 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
5693 		std::string											name;
5694 	}
5695 	const buildTypes[] =
5696 	{
5697 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5698 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5699 	};
5700 
5701 	struct
5702 	{
5703 		OperationTarget	target;
5704 		std::string		name;
5705 	}
5706 	const targets[] =
5707 	{
5708 		{ OT_TOP_ACCELERATION,		"top" },
5709 		{ OT_BOTTOM_ACCELERATION,	"bottom" },
5710 	};
5711 
5712 	auto& ctx = group->getTestContext();
5713 
5714 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5715 	{
5716 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5717 
5718 		for (int targetIdx = 0; targetIdx < DE_LENGTH_OF_ARRAY(targets); ++targetIdx)
5719 		{
5720 			TestParams testParams
5721 			{
5722 				buildTypes[buildTypeIdx].buildType,									// buildType		- are we making AS on CPU or GPU
5723 				VK_FORMAT_R32G32B32_SFLOAT,											// vertexFormat
5724 				false,																// padVertices
5725 				VK_INDEX_TYPE_NONE_KHR,												// indexType
5726 				BottomTestType::TRIANGLES,											// bottomTestType	- what kind of geometry is stored in bottom AS
5727 				InstanceCullFlags::NONE,											// cullFlags		- Flags for instances, if needed.
5728 				false,																// bottomUsesAOP	- does bottom AS use arrays, or arrays of pointers
5729 				false,																// bottomGeneric	- Bottom created as generic AS type.
5730 				false,																// bottomUnboundedCreation - Create BLAS using buffers with unbounded memory.
5731 				TopTestType::IDENTICAL_INSTANCES,									// topTestType		- If instances are identical then bottom geometries must have different vertices/aabbs
5732 				false,																// topUsesAOP		- does top AS use arrays, or arrays of pointers
5733 				false,																// topGeneric		- Top created as generic AS type.
5734 				false,																// topUnboundedCreation - Create TLAS using buffers with unbounded memory.
5735 				VkBuildAccelerationStructureFlagsKHR(0u),							// buildFlags
5736 				targets[targetIdx].target,											// operationTarget
5737 				OP_NONE,															// operationType
5738 				RTAS_DEFAULT_SIZE,													// width
5739 				RTAS_DEFAULT_SIZE,													// height
5740 				de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),	// testConfiguration
5741 				0u,																	// workerThreadsCount
5742 				EmptyAccelerationStructureCase::NOT_EMPTY,							// emptyASCase
5743 				InstanceCustomIndexCase::NONE,										// instanceCustomIndexCase
5744 				false,																// useCullMask
5745 				0xFFu,																// cullMask
5746 				UpdateCase::NONE,													// updateCase
5747 			};
5748 			buildTypeGroup->addChild(new RayTracingDeviceASCompabilityKHRTestCase(ctx, targets[targetIdx].name.c_str(), de::SharedPtr<TestParams>(new TestParams(testParams))));
5749 		}
5750 		group->addChild(buildTypeGroup.release());
5751 	}
5752 }
5753 
addUpdateHeaderBottomAddressTests(tcu::TestCaseGroup * group)5754 void addUpdateHeaderBottomAddressTests (tcu::TestCaseGroup* group)
5755 {
5756 	struct
5757 	{
5758 		vk::VkAccelerationStructureBuildTypeKHR		buildType;
5759 		std::string									name;
5760 	}
5761 	const buildTypes[] =
5762 	{
5763 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
5764 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
5765 	};
5766 
5767 	struct
5768 	{
5769 		TopTestType	type;
5770 		std::string	name;
5771 	}
5772 	const instTypes[] =
5773 	{
5774 		{ TopTestType::IDENTICAL_INSTANCES,	"the_same_instances"		},
5775 		{ TopTestType::DIFFERENT_INSTANCES,	"different_instances"		},
5776 		{ TopTestType::MIX_INSTANCES,		"mix_same_diff_instances"	},
5777 	};
5778 
5779 	auto& ctx = group->getTestContext();
5780 
5781 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5782 	{
5783 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5784 
5785 		for (int instTypeIdx = 0; instTypeIdx < DE_LENGTH_OF_ARRAY(instTypes); ++instTypeIdx)
5786 		{
5787 			TestParams testParams
5788 			{
5789 				buildTypes[buildTypeIdx].buildType,									// buildType
5790 				VK_FORMAT_R32G32B32_SFLOAT,											// vertexFormat
5791 				false,																// padVertices
5792 				VK_INDEX_TYPE_NONE_KHR,												// indexType
5793 				BottomTestType::TRIANGLES,											// bottomTestType
5794 				InstanceCullFlags::NONE,											// cullFlags
5795 				false,																// bottomUsesAOP
5796 				false,																// bottomGeneric
5797 				false,																// bottomUnboundedCreation
5798 				instTypes[instTypeIdx].type,										// topTestType
5799 				false,																// topUsesAOP
5800 				false,																// topGeneric
5801 				false,																// topUnboundedCreation
5802 				VkBuildAccelerationStructureFlagsKHR(0u),							// buildFlags
5803 				OT_TOP_ACCELERATION,												// operationTarget
5804 				OP_NONE,															// operationType
5805 				RTAS_DEFAULT_SIZE,													// width
5806 				RTAS_DEFAULT_SIZE,													// height
5807 				de::SharedPtr<TestConfiguration>(DE_NULL),							// testConfiguration
5808 				0u,																	// workerThreadsCount
5809 				EmptyAccelerationStructureCase::NOT_EMPTY,							// emptyASCase
5810 				InstanceCustomIndexCase::NONE,										// instanceCustomIndexCase
5811 				false,																// useCullMask
5812 				0xFFu,																// cullMask
5813 				UpdateCase::NONE,													// updateCase
5814 			};
5815 			buildTypeGroup->addChild(new RayTracingHeaderBottomAddressTestCase(ctx, instTypes[instTypeIdx].name.c_str(), de::SharedPtr<TestParams>(new TestParams(testParams))));
5816 		}
5817 		group->addChild(buildTypeGroup.release());
5818 	}
5819 }
5820 
addQueryPoolResultsTests(TestCaseGroup * group)5821 void addQueryPoolResultsTests (TestCaseGroup* group)
5822 {
5823 	std::pair<VkAccelerationStructureBuildTypeKHR, const char*>
5824 	const buildTypes[]
5825 	{
5826 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu"	},
5827 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu"	},
5828 	};
5829 
5830 	std::pair<bool, const char*>
5831 	const storeTypes[]
5832 	{
5833 		{ false,	"memory"	},
5834 		{ true,		"buffer"	}
5835 	};
5836 
5837 	std::pair<QueryPoolResultsParams::Type, const char*>
5838 	const queryTypes[]
5839 	{
5840 		{ QueryPoolResultsParams::Type::StructureSize,	"structure_size"	},
5841 		{ QueryPoolResultsParams::Type::PointerCount,	"pointer_count"		}
5842 	};
5843 
5844 	std::pair<bool, const char*>
5845 		const buildWithCompacted[]
5846 	{
5847 		{ false,	"no_compacted"		},
5848 		{ true,		"enable_compacted"	}
5849 	};
5850 
5851 	auto& testContext = group->getTestContext();
5852 	for (const auto& buildType : buildTypes)
5853 	{
5854 		auto buildTypeGroup = makeMovePtr<TestCaseGroup>(testContext, buildType.second, "");
5855 		for (const auto& compacted : buildWithCompacted)
5856 		{
5857 			auto buildCompactedGroup = makeMovePtr<TestCaseGroup>(testContext, compacted.second, "");
5858 			for (const auto& storeType : storeTypes)
5859 			{
5860 				auto storeTypeGroup = makeMovePtr<TestCaseGroup>(testContext, storeType.second, "");
5861 				for (const auto& queryType : queryTypes)
5862 				{
5863 					QueryPoolResultsParams	p;
5864 					p.buildType = buildType.first;
5865 					p.inVkBuffer = storeType.first;
5866 					p.queryType = queryType.first;
5867 					p.blasCount = 5;
5868 					p.compacted = compacted.first;
5869 
5870 					storeTypeGroup->addChild(new QueryPoolResultsCase(testContext, queryType.second, makeSharedFrom(p)));
5871 				}
5872 				buildCompactedGroup->addChild(storeTypeGroup.release());
5873 			}
5874 			buildTypeGroup->addChild(buildCompactedGroup.release());
5875 		}
5876 		group->addChild(buildTypeGroup.release());
5877 	}
5878 }
5879 
addCopyWithinPipelineTests(TestCaseGroup * group)5880 void addCopyWithinPipelineTests (TestCaseGroup* group)
5881 {
5882 	std::pair<VkAccelerationStructureBuildTypeKHR, const char*>
5883 	const buildTypes[]
5884 	{
5885 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu"	},
5886 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu"	},
5887 	};
5888 	std::pair<CopyWithinPipelineParams::Type, const char*>
5889 	const testTypes[]
5890 	{
5891 		{ CopyWithinPipelineParams::Type::StageASCopyBit,		"stage_as_copy_bit"  },
5892 		{ CopyWithinPipelineParams::Type::StageAllTransferBit,	"stage_all_transfer" },
5893 		{ CopyWithinPipelineParams::Type::AccessSBTReadBit,		"access_sbt_read"	 }
5894 	};
5895 
5896 	auto& testContext = group->getTestContext();
5897 	for (const auto& buildType : buildTypes)
5898 	{
5899 		auto buildTypeGroup	= makeMovePtr<TestCaseGroup>(testContext, buildType.second, "");
5900 		for (const auto& testType : testTypes)
5901 		{
5902 			CopyWithinPipelineParams	p;
5903 			p.width		= 16;
5904 			p.height	= 16;
5905 			p.build		= buildType.first;
5906 			p.type		= testType.first;
5907 
5908 			buildTypeGroup->addChild(new PipelineStageASCase(testContext, testType.second, makeSharedFrom(p)));
5909 		}
5910 		group->addChild(buildTypeGroup.release());
5911 	}
5912 }
5913 
addUpdateTests(TestCaseGroup * group)5914 void addUpdateTests(TestCaseGroup* group)
5915 {
5916 	const struct
5917 	{
5918 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
5919 		std::string											name;
5920 	} buildTypes[] =
5921 	{
5922 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu"},
5923 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu"},
5924 	};
5925 
5926 	struct
5927 	{
5928 		UpdateCase				updateType;
5929 		const char*				name;
5930 	} updateTypes[] =
5931 	{
5932 		{ UpdateCase::VERTICES,		"vertices"	},
5933 		{ UpdateCase::INDICES,		"indices"	},
5934 		{ UpdateCase::TRANSFORM,	"transform"	},
5935 	};
5936 
5937 	auto& ctx = group->getTestContext();
5938 
5939 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5940 	{
5941 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5942 
5943 		for (int updateTypesIdx = 0; updateTypesIdx < DE_LENGTH_OF_ARRAY(updateTypes); ++updateTypesIdx)
5944 		{
5945 			TestParams testParams
5946 			{
5947 				buildTypes[buildTypeIdx].buildType,
5948 				VK_FORMAT_R32G32B32_SFLOAT,
5949 				false,
5950 				VK_INDEX_TYPE_UINT16,
5951 				BottomTestType::TRIANGLES,
5952 				InstanceCullFlags::NONE,
5953 				false,
5954 				false,
5955 				false,
5956 				TopTestType::IDENTICAL_INSTANCES,
5957 				false,
5958 				false,
5959 				false,
5960 				VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR,
5961 				OT_TOP_ACCELERATION,
5962 				OP_NONE,
5963 				RTAS_DEFAULT_SIZE,
5964 				RTAS_DEFAULT_SIZE,
5965 				de::SharedPtr<TestConfiguration>(new UpdateableASConfiguration()),
5966 				0u,
5967 				EmptyAccelerationStructureCase::NOT_EMPTY,
5968 				InstanceCustomIndexCase::NONE,
5969 				false,
5970 				0xFFu,
5971 				updateTypes[updateTypesIdx].updateType,
5972 			};
5973 			buildTypeGroup->addChild(new ASUpdateCase(ctx, updateTypes[updateTypesIdx].name, testParams));
5974 		}
5975 		group->addChild(buildTypeGroup.release());
5976 	}
5977 }
5978 
5979 
createAccelerationStructuresTests(tcu::TestContext & testCtx)5980 tcu::TestCaseGroup*	createAccelerationStructuresTests(tcu::TestContext& testCtx)
5981 {
5982 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "acceleration_structures"));
5983 
5984 	// Test building AS with different build types, build flags and geometries/instances using arrays or arrays of pointers
5985 	addTestGroup(group.get(), "flags", addBasicBuildingTests);
5986 	// Test building AS with different vertex and index formats
5987 	addTestGroup(group.get(), "format", addVertexIndexFormatsTests);
5988 	// Test copying, compaction and serialization of AS
5989 	addTestGroup(group.get(), "operations", addOperationTests);
5990 	// Test host threading operations
5991 	addTestGroup(group.get(), "host_threading", addHostThreadingOperationTests);
5992 	// Test using AS as function argument using both pointers and bare values
5993 	addTestGroup(group.get(), "function_argument", addFuncArgTests);
5994 	// Test building AS with counterclockwise triangles and/or disabling face culling
5995 	addTestGroup(group.get(), "instance_triangle_culling", addInstanceTriangleCullingTests);
5996 	// Test for CullMaskKHR builtin as a part of VK_KHR_ray_tracing_maintenance1
5997 	addTestGroup(group.get(), "ray_cull_mask", addInstanceRayCullMaskTests);
5998 	// Exercise dynamic indexing of acceleration structures
5999 	addTestGroup(group.get(), "dynamic_indexing", addDynamicIndexingTests);
6000 	// Test building empty acceleration structures using different methods
6001 	addTestGroup(group.get(), "empty", addEmptyAccelerationStructureTests);
6002 	// Test using different values for the instance index and checking them in shaders
6003 	addTestGroup(group.get(), "instance_index", addInstanceIndexTests);
6004 	// Test updating instance index using both in-place and separate src/dst acceleration structures
6005 	addTestGroup(group.get(), "instance_update", addInstanceUpdateTests);
6006 	addTestGroup(group.get(), "device_compability_khr", addGetDeviceAccelerationStructureCompabilityTests);
6007 	addTestGroup(group.get(), "header_bottom_address", addUpdateHeaderBottomAddressTests);
6008 	// Test for a new VkQueryPool queries for VK_KHR_ray_tracing_maintenance1
6009 	addTestGroup(group.get(), "query_pool_results", addQueryPoolResultsTests);
6010 	// Tests ACCELLERATION_STRUCTURE_COPY and ACCESS_2_SBT_READ with VK_KHR_ray_tracing_maintenance1
6011 	addTestGroup(group.get(), "copy_within_pipeline", addCopyWithinPipelineTests);
6012 	// Tests updating AS via replacing vertex/index/transform buffers
6013 	addTestGroup(group.get(), "update", addUpdateTests);
6014 
6015 	return group.release();
6016 }
6017 
6018 }	// RayTracing
6019 
6020 }	// vkt
6021