• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *	  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Acceleration Structures tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingAccelerationStructuresTests.hpp"
25 
26 #include "vkDefs.hpp"
27 #include "deClock.h"
28 #include "deRandom.h"
29 
30 #include "vktTestCase.hpp"
31 #include "vktTestGroupUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkImageWithMemory.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkImageUtil.hpp"
40 #include "vkRayTracingUtil.hpp"
41 #include "tcuVectorUtil.hpp"
42 #include "tcuTexture.hpp"
43 #include "tcuTestLog.hpp"
44 #include "tcuImageCompare.hpp"
45 #include "tcuFloat.hpp"
46 
47 #include <set>
48 
49 namespace vkt
50 {
51 namespace RayTracing
52 {
53 namespace
54 {
55 using namespace vk;
56 using namespace vkt;
57 
58 static const VkFlags	ALL_RAY_TRACING_STAGES	= VK_SHADER_STAGE_RAYGEN_BIT_KHR
59 												| VK_SHADER_STAGE_ANY_HIT_BIT_KHR
60 												| VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR
61 												| VK_SHADER_STAGE_MISS_BIT_KHR
62 												| VK_SHADER_STAGE_INTERSECTION_BIT_KHR
63 												| VK_SHADER_STAGE_CALLABLE_BIT_KHR;
64 
65 
66 enum BottomTestType
67 {
68 	BTT_TRIANGLES,
69 	BTT_AABBS
70 };
71 
72 enum TopTestType
73 {
74 	TTT_IDENTICAL_INSTANCES,
75 	TTT_DIFFERENT_INSTANCES,
76 	TTT_MIX_INSTANCES,
77 };
78 
79 enum OperationTarget
80 {
81 	OT_NONE,
82 	OT_TOP_ACCELERATION,
83 	OT_BOTTOM_ACCELERATION
84 };
85 
86 enum OperationType
87 {
88 	OP_NONE,
89 	OP_COPY,
90 	OP_COMPACT,
91 	OP_SERIALIZE
92 };
93 
94 enum class InstanceCullFlags
95 {
96 	NONE,
97 	CULL_DISABLE,
98 	COUNTERCLOCKWISE,
99 	ALL,
100 };
101 
102 enum class EmptyAccelerationStructureCase
103 {
104 	NOT_EMPTY				= 0,
105 	INACTIVE_TRIANGLES		= 1,
106 	INACTIVE_INSTANCES		= 2,
107 	NO_GEOMETRIES_BOTTOM	= 3,	// geometryCount zero when building.
108 	NO_PRIMITIVES_BOTTOM	= 4,	// primitiveCount zero when building.
109 	NO_PRIMITIVES_TOP		= 5,	// primitiveCount zero when building.
110 };
111 
112 enum class InstanceCustomIndexCase
113 {
114 	NONE			= 0,
115 	CLOSEST_HIT		= 1,
116 	ANY_HIT			= 2,
117 	INTERSECTION	= 3,
118 };
119 
120 static const deUint32 RTAS_DEFAULT_SIZE = 8u;
121 
122 // Chosen to have the most significant bit set to 1 when represented using 24 bits.
123 // This will make sure the instance custom index will not be sign-extended by mistake.
124 constexpr deUint32 INSTANCE_CUSTOM_INDEX_BASE = 0x807f00u;
125 
126 struct TestParams;
127 
128 class TestConfiguration
129 {
130 public:
131 	virtual std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
132 																												 TestParams&						testParams) = 0;
133 	virtual de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
134 																												 TestParams&						testParams,
135 																												 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) = 0;
136 	virtual void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
137 																												 Context&							context,
138 																												TestParams&							testParams) = 0;
139 	virtual void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
140 																												 Context&							context,
141 																												 TestParams&						testParams,
142 																												 VkPipeline							pipeline,
143 																												 deUint32							shaderGroupHandleSize,
144 																												 deUint32							shaderGroupBaseAlignment,
145 																												 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
146 																												 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
147 																												 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) = 0;
148 	virtual bool															verifyImage							(BufferWithMemory*					resultBuffer,
149 																												 Context&							context,
150 																												 TestParams&						testParams) = 0;
151 	virtual VkFormat														getResultImageFormat				() = 0;
152 	virtual size_t															getResultImageFormatSize			() = 0;
153 	virtual VkClearValue													getClearValue						() = 0;
154 };
155 
156 struct TestParams
157 {
158 	vk::VkAccelerationStructureBuildTypeKHR	buildType;		// are we making AS on CPU or GPU
159 	VkFormat								vertexFormat;
160 	bool									padVertices;
161 	VkIndexType								indexType;
162 	BottomTestType							bottomTestType; // what kind of geometry is stored in bottom AS
163 	InstanceCullFlags						cullFlags;		// Flags for instances, if needed.
164 	bool									bottomUsesAOP;	// does bottom AS use arrays, or arrays of pointers
165 	bool									bottomGeneric;	// Bottom created as generic AS type.
166 	TopTestType								topTestType;	// If instances are identical then bottom geometries must have different vertices/aabbs
167 	bool									topUsesAOP;		// does top AS use arrays, or arrays of pointers
168 	bool									topGeneric;		// Top created as generic AS type.
169 	VkBuildAccelerationStructureFlagsKHR	buildFlags;
170 	OperationTarget							operationTarget;
171 	OperationType							operationType;
172 	deUint32								width;
173 	deUint32								height;
174 	de::SharedPtr<TestConfiguration>		testConfiguration;
175 	deUint32								workerThreadsCount;
176 	EmptyAccelerationStructureCase			emptyASCase;
177 	InstanceCustomIndexCase					instanceCustomIndexCase;
178 };
179 
getShaderGroupSize(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)180 deUint32 getShaderGroupSize (const InstanceInterface&	vki,
181 							 const VkPhysicalDevice		physicalDevice)
182 {
183 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
184 
185 	rayTracingPropertiesKHR	= makeRayTracingProperties(vki, physicalDevice);
186 	return rayTracingPropertiesKHR->getShaderGroupHandleSize();
187 }
188 
getShaderGroupBaseAlignment(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)189 deUint32 getShaderGroupBaseAlignment (const InstanceInterface&	vki,
190 									  const VkPhysicalDevice	physicalDevice)
191 {
192 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
193 
194 	rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
195 	return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
196 }
197 
makeImageCreateInfo(deUint32 width,deUint32 height,VkFormat format)198 VkImageCreateInfo makeImageCreateInfo (deUint32 width, deUint32 height, VkFormat format)
199 {
200 	const VkImageCreateInfo			imageCreateInfo			=
201 	{
202 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,																// VkStructureType			sType;
203 		DE_NULL,																							// const void*				pNext;
204 		(VkImageCreateFlags)0u,																				// VkImageCreateFlags		flags;
205 		VK_IMAGE_TYPE_2D,																					// VkImageType				imageType;
206 		format,																								// VkFormat					format;
207 		makeExtent3D(width, height, 1u),																	// VkExtent3D				extent;
208 		1u,																									// deUint32					mipLevels;
209 		1u,																									// deUint32					arrayLayers;
210 		VK_SAMPLE_COUNT_1_BIT,																				// VkSampleCountFlagBits	samples;
211 		VK_IMAGE_TILING_OPTIMAL,																			// VkImageTiling			tiling;
212 		VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
213 		VK_SHARING_MODE_EXCLUSIVE,																			// VkSharingMode			sharingMode;
214 		0u,																									// deUint32					queueFamilyIndexCount;
215 		DE_NULL,																							// const deUint32*			pQueueFamilyIndices;
216 		VK_IMAGE_LAYOUT_UNDEFINED																			// VkImageLayout			initialLayout;
217 	};
218 
219 	return imageCreateInfo;
220 }
221 
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,deUint32 queryCount)222 Move<VkQueryPool> makeQueryPool(const DeviceInterface&		vk,
223 								const VkDevice				device,
224 								const VkQueryType			queryType,
225 								deUint32					queryCount)
226 {
227 	const VkQueryPoolCreateInfo				queryPoolCreateInfo =
228 	{
229 		VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,		// sType
230 		DE_NULL,										// pNext
231 		(VkQueryPoolCreateFlags)0,						// flags
232 		queryType,										// queryType
233 		queryCount,										// queryCount
234 		0u,												// pipelineStatistics
235 	};
236 	return createQueryPool(vk, device, &queryPoolCreateInfo);
237 }
238 
getCullFlags(InstanceCullFlags flags)239 VkGeometryInstanceFlagsKHR getCullFlags (InstanceCullFlags flags)
240 {
241 	VkGeometryInstanceFlagsKHR cullFlags = 0u;
242 
243 	if (flags == InstanceCullFlags::CULL_DISABLE || flags == InstanceCullFlags::ALL)
244 		cullFlags |= VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
245 
246 	if (flags == InstanceCullFlags::COUNTERCLOCKWISE || flags == InstanceCullFlags::ALL)
247 		cullFlags |= VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR;
248 
249 	return cullFlags;
250 }
251 
252 class CheckerboardConfiguration : public TestConfiguration
253 {
254 public:
255 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
256 																										 TestParams&						testParams) override;
257 	de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
258 																										 TestParams&						testParams,
259 																										 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) override;
260 	void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
261 																										 Context&							context,
262 																										 TestParams&						testParams) override;
263 	void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
264 																										 Context&							context,
265 																										 TestParams&						testParams,
266 																										 VkPipeline							pipeline,
267 																										 deUint32							shaderGroupHandleSize,
268 																										 deUint32							shaderGroupBaseAlignment,
269 																										 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
270 																										 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
271 																										 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) override;
272 	bool															verifyImage							(BufferWithMemory*					resultBuffer,
273 																										 Context&							context,
274 																										 TestParams&						testParams) override;
275 	VkFormat														getResultImageFormat				() override;
276 	size_t															getResultImageFormatSize			() override;
277 	VkClearValue													getClearValue						() override;
278 };
279 
initBottomAccelerationStructures(Context & context,TestParams & testParams)280 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > CheckerboardConfiguration::initBottomAccelerationStructures (Context&			context,
281 																														   TestParams&		testParams)
282 {
283 	DE_UNREF(context);
284 
285 	// Cull flags can only be used with triangles.
286 	DE_ASSERT(testParams.cullFlags == InstanceCullFlags::NONE || testParams.bottomTestType == BTT_TRIANGLES);
287 
288 	// Checkerboard configuration does not support empty geometry tests.
289 	DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
290 
291 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
292 
293 	const auto instanceFlags = getCullFlags(testParams.cullFlags);
294 
295 	tcu::Vec3 v0(0.0, 1.0, 0.0);
296 	tcu::Vec3 v1(0.0, 0.0, 0.0);
297 	tcu::Vec3 v2(1.0, 1.0, 0.0);
298 	tcu::Vec3 v3(1.0, 0.0, 0.0);
299 
300 	if (testParams.topTestType == TTT_DIFFERENT_INSTANCES)
301 	{
302 		de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
303 		bottomLevelAccelerationStructure->setGeometryCount(1u);
304 		de::SharedPtr<RaytracedGeometryBase> geometry;
305 		if (testParams.bottomTestType == BTT_TRIANGLES)
306 		{
307 			geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
308 			if (testParams.indexType == VK_INDEX_TYPE_NONE_KHR)
309 			{
310 				if (instanceFlags == 0u)
311 				{
312 					geometry->addVertex(v0);
313 					geometry->addVertex(v1);
314 					geometry->addVertex(v2);
315 					geometry->addVertex(v2);
316 					geometry->addVertex(v1);
317 					geometry->addVertex(v3);
318 				}
319 				else // Counterclockwise so the flags will be needed for the geometry to be visible.
320 				{
321 					geometry->addVertex(v2);
322 					geometry->addVertex(v1);
323 					geometry->addVertex(v0);
324 					geometry->addVertex(v3);
325 					geometry->addVertex(v1);
326 					geometry->addVertex(v2);
327 				}
328 			}
329 			else // m_data.indexType != VK_INDEX_TYPE_NONE_KHR
330 			{
331 				geometry->addVertex(v0);
332 				geometry->addVertex(v1);
333 				geometry->addVertex(v2);
334 				geometry->addVertex(v3);
335 
336 				if (instanceFlags == 0u)
337 				{
338 					geometry->addIndex(0);
339 					geometry->addIndex(1);
340 					geometry->addIndex(2);
341 					geometry->addIndex(2);
342 					geometry->addIndex(1);
343 					geometry->addIndex(3);
344 				}
345 				else // Counterclockwise so the flags will be needed for the geometry to be visible.
346 				{
347 					geometry->addIndex(2);
348 					geometry->addIndex(1);
349 					geometry->addIndex(0);
350 					geometry->addIndex(3);
351 					geometry->addIndex(1);
352 					geometry->addIndex(2);
353 				}
354 			}
355 		}
356 		else // m_data.bottomTestType == BTT_AABBS
357 		{
358 			geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_AABBS_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
359 
360 			if (!testParams.padVertices)
361 			{
362 				// Single AABB.
363 				geometry->addVertex(tcu::Vec3(0.0f, 0.0f, -0.1f));
364 				geometry->addVertex(tcu::Vec3(1.0f, 1.0f,  0.1f));
365 			}
366 			else
367 			{
368 				// Multiple AABBs covering the same space.
369 				geometry->addVertex(tcu::Vec3(0.0f, 0.0f, -0.1f));
370 				geometry->addVertex(tcu::Vec3(0.5f, 0.5f,  0.1f));
371 
372 				geometry->addVertex(tcu::Vec3(0.5f, 0.5f, -0.1f));
373 				geometry->addVertex(tcu::Vec3(1.0f, 1.0f,  0.1f));
374 
375 				geometry->addVertex(tcu::Vec3(0.0f, 0.5f, -0.1f));
376 				geometry->addVertex(tcu::Vec3(0.5f, 1.0f,  0.1f));
377 
378 				geometry->addVertex(tcu::Vec3(0.5f, 0.0f, -0.1f));
379 				geometry->addVertex(tcu::Vec3(1.0f, 0.5f,  0.1f));
380 			}
381 		}
382 
383 		bottomLevelAccelerationStructure->addGeometry(geometry);
384 
385 		if (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT)
386 			geometry->setGeometryFlags(VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR);
387 
388 		result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
389 	}
390 	else // m_data.topTestType == TTT_IDENTICAL_INSTANCES
391 	{
392 		// triangle and aabb tests use geometries/aabbs with different vertex positions and the same identity matrix in each instance data
393 		for (deUint32 y = 0; y < testParams.height; ++y)
394 		for (deUint32 x = 0; x < testParams.width; ++x)
395 		{
396 			// let's build a chessboard of geometries
397 			if (((x + y) % 2) == 0)
398 				continue;
399 			tcu::Vec3 xyz((float)x, (float)y, 0.0f);
400 
401 			de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
402 			bottomLevelAccelerationStructure->setGeometryCount(1u);
403 
404 			de::SharedPtr<RaytracedGeometryBase> geometry;
405 			if (testParams.bottomTestType == BTT_TRIANGLES)
406 			{
407 				geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
408 				if (testParams.indexType == VK_INDEX_TYPE_NONE_KHR)
409 				{
410 					if (instanceFlags == 0u)
411 					{
412 						geometry->addVertex(xyz + v0);
413 						geometry->addVertex(xyz + v1);
414 						geometry->addVertex(xyz + v2);
415 						geometry->addVertex(xyz + v2);
416 						geometry->addVertex(xyz + v1);
417 						geometry->addVertex(xyz + v3);
418 					}
419 					else // Counterclockwise so the flags will be needed for the geometry to be visible.
420 					{
421 						geometry->addVertex(xyz + v2);
422 						geometry->addVertex(xyz + v1);
423 						geometry->addVertex(xyz + v0);
424 						geometry->addVertex(xyz + v3);
425 						geometry->addVertex(xyz + v1);
426 						geometry->addVertex(xyz + v2);
427 					}
428 				}
429 				else
430 				{
431 					geometry->addVertex(xyz + v0);
432 					geometry->addVertex(xyz + v1);
433 					geometry->addVertex(xyz + v2);
434 					geometry->addVertex(xyz + v3);
435 
436 					if (instanceFlags == 0u)
437 					{
438 						geometry->addIndex(0);
439 						geometry->addIndex(1);
440 						geometry->addIndex(2);
441 						geometry->addIndex(2);
442 						geometry->addIndex(1);
443 						geometry->addIndex(3);
444 					}
445 					else // Counterclockwise so the flags will be needed for the geometry to be visible.
446 					{
447 						geometry->addIndex(2);
448 						geometry->addIndex(1);
449 						geometry->addIndex(0);
450 						geometry->addIndex(3);
451 						geometry->addIndex(1);
452 						geometry->addIndex(2);
453 					}
454 				}
455 			}
456 			else // testParams.bottomTestType == BTT_AABBS
457 			{
458 				geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_AABBS_KHR, testParams.vertexFormat, testParams.indexType, testParams.padVertices);
459 
460 				if (!testParams.padVertices)
461 				{
462 					// Single AABB.
463 					geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.0f, -0.1f));
464 					geometry->addVertex(xyz + tcu::Vec3(1.0f, 1.0f,  0.1f));
465 				}
466 				else
467 				{
468 					// Multiple AABBs covering the same space.
469 					geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.0f, -0.1f));
470 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.5f,  0.1f));
471 
472 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.5f, -0.1f));
473 					geometry->addVertex(xyz + tcu::Vec3(1.0f, 1.0f,  0.1f));
474 
475 					geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.5f, -0.1f));
476 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 1.0f,  0.1f));
477 
478 					geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.0f, -0.1f));
479 					geometry->addVertex(xyz + tcu::Vec3(1.0f, 0.5f,  0.1f));
480 				}
481 			}
482 
483 			bottomLevelAccelerationStructure->addGeometry(geometry);
484 
485 			if (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT)
486 				geometry->setGeometryFlags(VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR);
487 
488 			result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
489 		}
490 	}
491 
492 	return result;
493 }
494 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)495 de::MovePtr<TopLevelAccelerationStructure> CheckerboardConfiguration::initTopAccelerationStructure (Context&		context,
496 																									TestParams&		testParams,
497 																									std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >& bottomLevelAccelerationStructures)
498 {
499 	// Checkerboard configuration does not support empty geometry tests.
500 	DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
501 
502 	DE_UNREF(context);
503 
504 	const auto instanceCount = testParams.width * testParams.height / 2u;
505 	const auto instanceFlags = getCullFlags(testParams.cullFlags);
506 
507 	de::MovePtr<TopLevelAccelerationStructure>	result = makeTopLevelAccelerationStructure();
508 	result->setInstanceCount(instanceCount);
509 
510 	if (testParams.topTestType == TTT_DIFFERENT_INSTANCES)
511 	{
512 
513 		for (deUint32 y = 0; y < testParams.height; ++y)
514 		for (deUint32 x = 0; x < testParams.width; ++x)
515 		{
516 			if (((x + y) % 2) == 0)
517 				continue;
518 			const VkTransformMatrixKHR			transformMatrixKHR =
519 			{
520 				{								//  float	matrix[3][4];
521 					{ 1.0f, 0.0f, 0.0f, (float)x },
522 					{ 0.0f, 1.0f, 0.0f, (float)y },
523 					{ 0.0f, 0.0f, 1.0f, 0.0f },
524 				}
525 			};
526 			const deUint32 instanceCustomIndex = ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ? (INSTANCE_CUSTOM_INDEX_BASE + x + y) : 0u);
527 			result->addInstance(bottomLevelAccelerationStructures[0], transformMatrixKHR, instanceCustomIndex, 0xFFu, 0u, instanceFlags);
528 		}
529 	}
530 	else // testParams.topTestType == TTT_IDENTICAL_INSTANCES
531 	{
532 		deUint32 currentInstanceIndex = 0;
533 
534 		for (deUint32 y = 0; y < testParams.height; ++y)
535 		for (deUint32 x = 0; x < testParams.width; ++x)
536 		{
537 			if (((x + y) % 2) == 0)
538 				continue;
539 			const deUint32 instanceCustomIndex = ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ? (INSTANCE_CUSTOM_INDEX_BASE + x + y) : 0u);
540 			result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex++], identityMatrix3x4, instanceCustomIndex, 0xFFu, 0u, instanceFlags);
541 		}
542 	}
543 
544 	return result;
545 }
546 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)547 void CheckerboardConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline>&		rayTracingPipeline,
548 													  Context&								context,
549 													  TestParams&							testParams)
550 {
551 	DE_UNREF(testParams);
552 	const DeviceInterface&						vkd						= context.getDeviceInterface();
553 	const VkDevice								device					= context.getDevice();
554 
555 	const bool useAnyHit		= (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT);
556 	const auto hitShaderStage	= (useAnyHit ? VK_SHADER_STAGE_ANY_HIT_BIT_KHR : VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
557 	const auto hitShaderName	= (useAnyHit ? "ahit" : "chit");
558 
559 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		createShaderModule(vkd, device, context.getBinaryCollection().get("rgen"),  0), 0);
560 	rayTracingPipeline->addShader(hitShaderStage,						createShaderModule(vkd, device, context.getBinaryCollection().get(hitShaderName),  0), 1);
561 	rayTracingPipeline->addShader(hitShaderStage,						createShaderModule(vkd, device, context.getBinaryCollection().get(hitShaderName),  0), 2);
562 	if (testParams.bottomTestType == BTT_AABBS)
563 		rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR,	createShaderModule(vkd, device, context.getBinaryCollection().get("isect"), 0), 2);
564 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			createShaderModule(vkd, device, context.getBinaryCollection().get("miss"),  0), 3);
565 }
566 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,deUint32 shaderGroupHandleSize,deUint32 shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)567 void CheckerboardConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
568 														Context&							context,
569 														TestParams&							testParams,
570 														VkPipeline							pipeline,
571 														deUint32							shaderGroupHandleSize,
572 														deUint32							shaderGroupBaseAlignment,
573 														de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
574 														de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
575 														de::MovePtr<BufferWithMemory>&		missShaderBindingTable)
576 {
577 	const DeviceInterface&						vkd						= context.getDeviceInterface();
578 	const VkDevice								device					= context.getDevice();
579 	Allocator&									allocator				= context.getDefaultAllocator();
580 
581 	raygenShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
582 	if(testParams.bottomTestType == BTT_AABBS)
583 		hitShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1 );
584 	else // testParams.bottomTestType == BTT_TRIANGLES
585 		hitShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
586 	missShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 3, 1 );
587 }
588 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)589 bool CheckerboardConfiguration::verifyImage(BufferWithMemory* resultBuffer, Context& context, TestParams& testParams)
590 {
591 	// Checkerboard configuration does not support empty geometry tests.
592 	DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
593 
594 	DE_UNREF(context);
595 	const auto*						bufferPtr	= (deInt32*)resultBuffer->getAllocation().getHostPtr();
596 	deUint32						pos			= 0;
597 	deUint32						failures	= 0;
598 
599 	// verify results - each test case should generate checkerboard pattern
600 	for (deUint32 y = 0; y < testParams.height; ++y)
601 	for (deUint32 x = 0; x < testParams.width; ++x)
602 	{
603 		// The hit value should match the shader code.
604 		const deInt32 hitValue			= ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ? static_cast<deInt32>(INSTANCE_CUSTOM_INDEX_BASE + x + y) : 2);
605 		const deInt32 expectedResult	= ((x + y) % 2) ? hitValue : 1;
606 
607 		if (bufferPtr[pos] != expectedResult)
608 			failures++;
609 
610 		++pos;
611 	}
612 	return failures == 0;
613 }
614 
getResultImageFormat()615 VkFormat CheckerboardConfiguration::getResultImageFormat()
616 {
617 	return VK_FORMAT_R32_SINT;
618 }
619 
getResultImageFormatSize()620 size_t CheckerboardConfiguration::getResultImageFormatSize()
621 {
622 	return sizeof(deUint32);
623 }
624 
getClearValue()625 VkClearValue CheckerboardConfiguration::getClearValue()
626 {
627 	return makeClearValueColorU32(0xFF, 0u, 0u, 0u);
628 }
629 
630 class SingleTriangleConfiguration : public TestConfiguration
631 {
632 public:
633 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(Context&							context,
634 																										 TestParams&						testParams) override;
635 	de::MovePtr<TopLevelAccelerationStructure>						initTopAccelerationStructure		(Context&							context,
636 																										 TestParams&						testParams,
637 																										 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >&	bottomLevelAccelerationStructures) override;
638 	void															initRayTracingShaders				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
639 																										 Context&							context,
640 																										 TestParams&						testParams) override;
641 	void															initShaderBindingTables				(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
642 																										 Context&							context,
643 																										 TestParams&						testParams,
644 																										 VkPipeline							pipeline,
645 																										 deUint32							shaderGroupHandleSize,
646 																										 deUint32							shaderGroupBaseAlignment,
647 																										 de::MovePtr<BufferWithMemory>&		raygenShaderBindingTable,
648 																										 de::MovePtr<BufferWithMemory>&		hitShaderBindingTable,
649 																										 de::MovePtr<BufferWithMemory>&		missShaderBindingTable) override;
650 	bool															verifyImage							(BufferWithMemory*					resultBuffer,
651 																										 Context&							context,
652 																										 TestParams&						testParams) override;
653 	VkFormat														getResultImageFormat				() override;
654 	size_t															getResultImageFormatSize			() override;
655 	VkClearValue													getClearValue						() override;
656 
657 	// well, actually we have 2 triangles, but we ignore the first one ( see raygen shader for this configuration )
658 	const std::vector<tcu::Vec3> vertices =
659 	{
660 		tcu::Vec3(0.0f, 0.0f, -0.1f),
661 		tcu::Vec3(-0.1f, 0.0f, 0.0f),
662 		tcu::Vec3(0.0f, -0.1f, 0.0f),
663 		tcu::Vec3(0.0f, 0.0f, 0.0f),
664 		tcu::Vec3(0.5f, 0.0f, -0.5f),
665 		tcu::Vec3(0.0f, 0.5f, -0.5f),
666 	};
667 
668 	const std::vector<deUint32> indices =
669 	{
670 		3,
671 		4,
672 		5
673 	};
674 };
675 
initBottomAccelerationStructures(Context & context,TestParams & testParams)676 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > SingleTriangleConfiguration::initBottomAccelerationStructures (Context&			context,
677 																															 TestParams&		testParams)
678 {
679 	DE_UNREF(context);
680 
681 	// No other cases supported for the single triangle configuration.
682 	DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
683 
684 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
685 
686 	de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
687 	bottomLevelAccelerationStructure->setGeometryCount(1u);
688 
689 	de::SharedPtr<RaytracedGeometryBase> geometry;
690 	geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType);
691 
692 	auto customVertices(vertices);
693 
694 	if (testParams.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_TRIANGLES)
695 	{
696 		const auto nanValue = tcu::Float32::nan().asFloat();
697 		for (auto& vtx : customVertices)
698 			vtx.x() = nanValue;
699 	}
700 
701 	for (auto it = begin(customVertices), eit = end(customVertices); it != eit; ++it)
702 		geometry->addVertex(*it);
703 
704 	if (testParams.indexType != VK_INDEX_TYPE_NONE_KHR)
705 	{
706 		for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
707 			geometry->addIndex(*it);
708 	}
709 	bottomLevelAccelerationStructure->addGeometry(geometry);
710 	result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
711 
712 	return result;
713 }
714 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)715 de::MovePtr<TopLevelAccelerationStructure> SingleTriangleConfiguration::initTopAccelerationStructure (Context&			context,
716 																									  TestParams&		testParams,
717 																									  std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >& bottomLevelAccelerationStructures)
718 {
719 	DE_UNREF(context);
720 	DE_UNREF(testParams);
721 
722 	// Unsupported in this configuration.
723 	DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
724 
725 	de::MovePtr<TopLevelAccelerationStructure>	result = makeTopLevelAccelerationStructure();
726 	result->setInstanceCount(1u);
727 
728 	result->addInstance(bottomLevelAccelerationStructures[0]);
729 
730 	return result;
731 }
732 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)733 void SingleTriangleConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline>&		rayTracingPipeline,
734 														Context&								context,
735 														TestParams&								testParams)
736 {
737 	DE_UNREF(testParams);
738 	const DeviceInterface&						vkd						= context.getDeviceInterface();
739 	const VkDevice								device					= context.getDevice();
740 
741 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		createShaderModule(vkd, device, context.getBinaryCollection().get("rgen_depth"),  0), 0);
742 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	createShaderModule(vkd, device, context.getBinaryCollection().get("chit_depth"),  0), 1);
743 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			createShaderModule(vkd, device, context.getBinaryCollection().get("miss_depth"),  0), 2);
744 }
745 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,deUint32 shaderGroupHandleSize,deUint32 shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)746 void SingleTriangleConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline>&	rayTracingPipeline,
747 														  Context&							context,
748 														  TestParams&						testParams,
749 														  VkPipeline						pipeline,
750 														  deUint32							shaderGroupHandleSize,
751 														  deUint32							shaderGroupBaseAlignment,
752 														  de::MovePtr<BufferWithMemory>&	raygenShaderBindingTable,
753 														  de::MovePtr<BufferWithMemory>&	hitShaderBindingTable,
754 														  de::MovePtr<BufferWithMemory>&	missShaderBindingTable)
755 {
756 	DE_UNREF(testParams);
757 	const DeviceInterface&						vkd						= context.getDeviceInterface();
758 	const VkDevice								device					= context.getDevice();
759 	Allocator&									allocator				= context.getDefaultAllocator();
760 
761 	raygenShaderBindingTable											= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
762 	hitShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
763 	missShaderBindingTable												= rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1 );
764 }
765 
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)766 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
767 {
768 	float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
769 	float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
770 
771 	if ((s < 0) != (t < 0))
772 		return false;
773 
774 	float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
775 
776 	return a < 0 ?
777 		(s <= 0 && s + t >= a) :
778 		(s >= 0 && s + t <= a);
779 }
780 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)781 bool SingleTriangleConfiguration::verifyImage(BufferWithMemory* resultBuffer, Context& context, TestParams& testParams)
782 {
783 	tcu::TextureFormat			imageFormat		= vk::mapVkFormat(getResultImageFormat());
784 	tcu::TextureFormat			vertexFormat	= vk::mapVkFormat(testParams.vertexFormat);
785 	tcu::ConstPixelBufferAccess	resultAccess	(imageFormat, testParams.width, testParams.height, 1, resultBuffer->getAllocation().getHostPtr());
786 
787 	std::vector<float>			reference		(testParams.width * testParams.height);
788 	tcu::PixelBufferAccess		referenceAccess	(imageFormat, testParams.width, testParams.height, 1, reference.data());
789 
790 	// verify results
791 	tcu::Vec3					v0				= vertices[3];
792 	tcu::Vec3					v1				= vertices[4];
793 	tcu::Vec3					v2				= vertices[5];
794 	const int					numChannels		= tcu::getNumUsedChannels(vertexFormat.order);
795 	if (numChannels < 3)
796 	{
797 		v0.z() = 0.0f;
798 		v1.z() = 0.0f;
799 		v2.z() = 0.0f;
800 	}
801 	tcu::Vec3					abc				= tcu::cross((v2 - v0), (v1 - v0));
802 
803 	for (deUint32 j = 0; j < testParams.height; ++j)
804 	{
805 		float y = 0.1f + 0.2f * float(j) / float(testParams.height - 1);
806 		for (deUint32 i = 0; i < testParams.width; ++i)
807 		{
808 			float	x			= 0.1f + 0.2f * float(i) / float(testParams.width - 1);
809 			float	z			= (abc.x()*x + abc.y()*y) / abc.z();
810 			bool	inTriangle	= pointInTriangle2D(tcu::Vec3(x, y, z), v0, v1, v2);
811 			float	refValue	= ((inTriangle && testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY) ? 1.0f+z : 0.0f);
812 			referenceAccess.setPixel(tcu::Vec4(refValue, 0.0f, 0.0f, 1.0f), i, j);
813 		}
814 	}
815 	return tcu::floatThresholdCompare(context.getTestContext().getLog(), "Result comparison", "", referenceAccess, resultAccess, tcu::Vec4(0.01f), tcu::COMPARE_LOG_EVERYTHING);
816 }
817 
getResultImageFormat()818 VkFormat SingleTriangleConfiguration::getResultImageFormat()
819 {
820 	return VK_FORMAT_R32_SFLOAT;
821 }
822 
getResultImageFormatSize()823 size_t SingleTriangleConfiguration::getResultImageFormatSize()
824 {
825 	return sizeof(float);
826 }
827 
getClearValue()828 VkClearValue SingleTriangleConfiguration::getClearValue()
829 {
830 	return makeClearValueColorF32(32.0f, 0.0f, 0.0f, 0.0f);
831 }
832 
commonASTestsCheckSupport(Context & context)833 void commonASTestsCheckSupport(Context& context)
834 {
835 	context.requireInstanceFunctionality("VK_KHR_get_physical_device_properties2");
836 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
837 	context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
838 
839 	const VkPhysicalDeviceRayTracingPipelineFeaturesKHR&	rayTracingPipelineFeaturesKHR = context.getRayTracingPipelineFeatures();
840 	if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == DE_FALSE)
841 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
842 
843 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
844 	if (accelerationStructureFeaturesKHR.accelerationStructure == DE_FALSE)
845 		TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
846 }
847 
848 class RayTracingASBasicTestCase : public TestCase
849 {
850 public:
851 																	RayTracingASBasicTestCase			(tcu::TestContext& context, const char* name, const char* desc, const TestParams& data);
852 																	~RayTracingASBasicTestCase			(void);
853 
854 	void															checkSupport						(Context& context) const override;
855 	void															initPrograms						(SourceCollections& programCollection) const override;
856 	TestInstance*													createInstance						(Context& context) const override;
857 protected:
858 	TestParams														m_data;
859 };
860 
861 // Same as RayTracingASBasicTestCase but it will only initialize programs for SingleTriangleConfiguration and use hand-tuned SPIR-V
862 // assembly.
863 class RayTracingASFuncArgTestCase : public RayTracingASBasicTestCase
864 {
865 public:
866 																	RayTracingASFuncArgTestCase			(tcu::TestContext& context, const char* name, const char* desc, const TestParams& data);
~RayTracingASFuncArgTestCase(void)867 																	~RayTracingASFuncArgTestCase		(void) {}
868 
869 	void															initPrograms						(SourceCollections& programCollection) const override;
870 };
871 
872 class RayTracingASBasicTestInstance : public TestInstance
873 {
874 public:
875 																	RayTracingASBasicTestInstance		(Context& context, const TestParams& data);
876 																	~RayTracingASBasicTestInstance		(void) = default;
877 	tcu::TestStatus													iterate								(void) override;
878 
879 protected:
880 	bool															iterateNoWorkers					(void);
881 	bool															iterateWithWorkers					(void);
882 	de::MovePtr<BufferWithMemory>									runTest								(const deUint32 workerThreadsCount);
883 private:
884 	TestParams														m_data;
885 };
886 
RayTracingASBasicTestCase(tcu::TestContext & context,const char * name,const char * desc,const TestParams & data)887 RayTracingASBasicTestCase::RayTracingASBasicTestCase (tcu::TestContext& context, const char* name, const char* desc, const TestParams& data)
888 	: vkt::TestCase	(context, name, desc)
889 	, m_data		(data)
890 {
891 }
892 
~RayTracingASBasicTestCase(void)893 RayTracingASBasicTestCase::~RayTracingASBasicTestCase	(void)
894 {
895 }
896 
checkSupport(Context & context) const897 void RayTracingASBasicTestCase::checkSupport(Context& context) const
898 {
899 	commonASTestsCheckSupport(context);
900 
901 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
902 	if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
903 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
904 
905 	// Check supported vertex format.
906 	checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(), m_data.vertexFormat);
907 }
908 
initPrograms(SourceCollections & programCollection) const909 void RayTracingASBasicTestCase::initPrograms (SourceCollections& programCollection) const
910 {
911 	bool storeInRGen = false;
912 	bool storeInAHit = false;
913 	bool storeInCHit = false;
914 	bool storeInISec = false;
915 
916 	switch (m_data.instanceCustomIndexCase)
917 	{
918 	case InstanceCustomIndexCase::NONE:			storeInRGen = true;	break;
919 	case InstanceCustomIndexCase::CLOSEST_HIT:	storeInCHit = true; break;
920 	case InstanceCustomIndexCase::ANY_HIT:		storeInAHit = true;	break;
921 	case InstanceCustomIndexCase::INTERSECTION:	storeInISec = true; break;
922 	default: DE_ASSERT(false); break;
923 	}
924 
925 	const std::string				imageDeclaration	= "layout(r32i, set = 0, binding = 0) uniform iimage2D result;\n";
926 	const std::string				storeCustomIndex	= "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(gl_InstanceCustomIndexEXT, 0, 0, 1));\n";
927 	const vk::ShaderBuildOptions	buildOptions		(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
928 
929 	{
930 		std::stringstream css;
931 		css
932 			<< "#version 460 core\n"
933 			<< "#extension GL_EXT_ray_tracing : require\n"
934 			<< "layout(location = 0) rayPayloadEXT ivec4 hitValue;\n";
935 
936 		if (storeInRGen)
937 			css << imageDeclaration;
938 
939 		css
940 			<< "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
941 			<< "\n"
942 			<< "void main()\n"
943 			<< "{\n"
944 			<< "  float tmin      = 0.0;\n"
945 			<< "  float tmax      = 1.0;\n"
946 			<< "  vec3  origin    = vec3(float(gl_LaunchIDEXT.x) + 0.5f, float(gl_LaunchIDEXT.y) + 0.5f, 0.5);\n"
947 			<< "  vec3  direction = vec3(0.0,0.0,-1.0);\n"
948 			<< "  hitValue        = ivec4(0,0,0,0);\n"
949 			<< "  traceRayEXT(topLevelAS, " << ((m_data.cullFlags == InstanceCullFlags::NONE) ? "0" : "gl_RayFlagsCullBackFacingTrianglesEXT") << ", 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);\n";
950 
951 		if (storeInRGen)
952 			css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n";
953 
954 		css << "}\n";
955 
956 		programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
957 	}
958 
959 	{
960 		std::stringstream css;
961 		css
962 			<< "#version 460 core\n"
963 			<< "#extension GL_EXT_ray_tracing : require\n"
964 			<< "layout(location = 0) rayPayloadInEXT ivec4 hitValue;\n";
965 
966 		if (storeInCHit)
967 			css << imageDeclaration;
968 
969 		css
970 			<< "void main()\n"
971 			<< "{\n"
972 			<< "  hitValue = ivec4(2,0,0,1);\n";
973 
974 		if (storeInCHit)
975 			css << storeCustomIndex;
976 
977 		css << "}\n";
978 
979 		programCollection.glslSources.add("chit") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
980 	}
981 
982 	if (storeInAHit)
983 	{
984 		std::stringstream css;
985 		css
986 			<< "#version 460 core\n"
987 			<< "#extension GL_EXT_ray_tracing : require\n"
988 			<< imageDeclaration
989 			<< "void main()\n"
990 			<< "{\n"
991 			<< storeCustomIndex
992 			<< "}\n";
993 
994 		programCollection.glslSources.add("ahit") << glu::AnyHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
995 	}
996 
997 	{
998 		std::stringstream css;
999 		css
1000 			<< "#version 460 core\n"
1001 			<< "#extension GL_EXT_ray_tracing : require\n"
1002 			<< "hitAttributeEXT ivec4 hitAttribute;\n";
1003 
1004 		if (storeInISec)
1005 			css << imageDeclaration;
1006 
1007 		css
1008 			<< "void main()\n"
1009 			<< "{\n"
1010 			<< "  hitAttribute = ivec4(0,0,0,0);\n"
1011 			<< "  reportIntersectionEXT(0.5f, 0);\n";
1012 
1013 		if (storeInISec)
1014 			css << storeCustomIndex;
1015 
1016 		css << "}\n";
1017 
1018 		programCollection.glslSources.add("isect") << glu::IntersectionSource(updateRayTracingGLSL(css.str())) << buildOptions;
1019 	}
1020 
1021 	{
1022 		std::stringstream css;
1023 		css
1024 			<< "#version 460 core\n"
1025 			<< "#extension GL_EXT_ray_tracing : require\n"
1026 			<< "layout(location = 0) rayPayloadInEXT ivec4 hitValue;\n";
1027 
1028 		if (!storeInRGen)
1029 			css << imageDeclaration;
1030 
1031 		css
1032 			<< "void main()\n"
1033 			<< "{\n"
1034 			<< "  hitValue = ivec4(1,0,0,1);\n";
1035 
1036 		if (!storeInRGen)
1037 			css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n";
1038 
1039 		css << "}\n";
1040 
1041 		programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1042 	}
1043 
1044 	{
1045 		std::stringstream css;
1046 		css <<
1047 			"#version 460 core\n"
1048 			"#extension GL_EXT_ray_tracing : require\n"
1049 			"layout(location = 0) rayPayloadEXT vec4 hitValue;\n"
1050 			"layout(r32f, set = 0, binding = 0) uniform image2D result;\n"
1051 			"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
1052 			"\n"
1053 			"vec3 calculateOrigin(vec3 zeroOrigin, vec3 xAxis, vec3 yAxis)\n"
1054 			"{\n"
1055 			"  return zeroOrigin + (float(gl_LaunchIDEXT.x)/float(gl_LaunchSizeEXT.x-1)) * xAxis + (float(gl_LaunchIDEXT.y)/float(gl_LaunchSizeEXT.y-1)) * yAxis;\n"
1056 			"}\n"
1057 			"\n"
1058 			"void main()\n"
1059 			"{\n"
1060 			"  float tmin      = 0.0;\n"
1061 			"  float tmax      = 2.0;\n"
1062 			"  vec3  origin    = calculateOrigin( vec3(0.1,0.1,1.0), vec3(0.2,0.0,0.0), vec3(0.0,0.2,0.0) );\n"
1063 			"  vec3  direction = vec3(0.0,0.0,-1.0);\n"
1064 			"  hitValue        = vec4(0.0,0.0,0.0,0.0);\n"
1065 			"  traceRayEXT(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);\n"
1066 			"  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n"
1067 			"}\n";
1068 		programCollection.glslSources.add("rgen_depth") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
1069 	}
1070 
1071 	{
1072 		std::stringstream css;
1073 		css <<
1074 			"#version 460 core\n"
1075 			"#extension GL_EXT_ray_tracing : require\n"
1076 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1077 			"void main()\n"
1078 			"{\n"
1079 			"  hitValue = vec4(gl_RayTmaxEXT,0.0,0.0,1.0);\n"
1080 			"}\n";
1081 
1082 		programCollection.glslSources.add("chit_depth") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1083 	}
1084 
1085 	{
1086 		std::stringstream css;
1087 		css <<
1088 			"#version 460 core\n"
1089 			"#extension GL_EXT_ray_tracing : require\n"
1090 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1091 			"void main()\n"
1092 			"{\n"
1093 			"  hitValue = vec4(0.0,0.0,0.0,1.0);\n"
1094 			"}\n";
1095 
1096 		programCollection.glslSources.add("miss_depth") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1097 	}
1098 }
1099 
createInstance(Context & context) const1100 TestInstance* RayTracingASBasicTestCase::createInstance (Context& context) const
1101 {
1102 	return new RayTracingASBasicTestInstance(context, m_data);
1103 }
1104 
RayTracingASFuncArgTestCase(tcu::TestContext & context,const char * name,const char * desc,const TestParams & data)1105 RayTracingASFuncArgTestCase::RayTracingASFuncArgTestCase (tcu::TestContext& context, const char* name, const char* desc, const TestParams& data)
1106 	: RayTracingASBasicTestCase (context, name, desc, data)
1107 {
1108 }
1109 
initPrograms(SourceCollections & programCollection) const1110 void RayTracingASFuncArgTestCase::initPrograms (SourceCollections& programCollection) const
1111 {
1112 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
1113 	const vk::SpirVAsmBuildOptions	spvBuildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, true);
1114 
1115 	{
1116 		// The SPIR-V assembly below is based on the following GLSL code. Some
1117 		// modifications have been made to make traceRaysBottomWrapper take a bare
1118 		// acceleration structure as its argument instead of a pointer to it, so we can
1119 		// test passing a pointer and a bare value in the same test.
1120 		//
1121 		//	#version 460 core
1122 		//	#extension GL_EXT_ray_tracing : require
1123 		//	layout(location = 0) rayPayloadEXT vec4 hitValue;
1124 		//	layout(r32f, set = 0, binding = 0) uniform image2D result;
1125 		//	layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;
1126 		//
1127 		//	void traceRaysBottomWrapper(
1128 		//	  accelerationStructureEXT topLevel,
1129 		//	  uint rayFlags,
1130 		//	  uint cullMask,
1131 		//	  uint sbtRecordOffset,
1132 		//	  uint sbtRecordStride,
1133 		//	  uint missIndex,
1134 		//	  vec3 origin,
1135 		//	  float Tmin,
1136 		//	  vec3 direction,
1137 		//	  float Tmax)
1138 		//	{
1139 		//	  traceRayEXT(topLevel, rayFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, origin, Tmin, direction, Tmax, 0);
1140 		//	}
1141 		//
1142 		//	void traceRaysTopWrapper(
1143 		//	  accelerationStructureEXT topLevel,
1144 		//	  uint rayFlags,
1145 		//	  uint cullMask,
1146 		//	  uint sbtRecordOffset,
1147 		//	  uint sbtRecordStride,
1148 		//	  uint missIndex,
1149 		//	  vec3 origin,
1150 		//	  float Tmin,
1151 		//	  vec3 direction,
1152 		//	  float Tmax)
1153 		//	{
1154 		//	  traceRaysBottomWrapper(topLevel, rayFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, origin, Tmin, direction, Tmax);
1155 		//	}
1156 		//
1157 		//	vec3 calculateOrigin(vec3 zeroOrigin, vec3 xAxis, vec3 yAxis)
1158 		//	{
1159 		//	  return zeroOrigin + (float(gl_LaunchIDEXT.x)/float(gl_LaunchSizeEXT.x-1)) * xAxis + (float(gl_LaunchIDEXT.y)/float(gl_LaunchSizeEXT.y-1)) * yAxis;
1160 		//	}
1161 		//
1162 		//	void main()
1163 		//	{
1164 		//	  float tmin      = 0.0;
1165 		//	  float tmax      = 2.0;
1166 		//	  vec3  origin    = calculateOrigin( vec3(0.1,0.1,1.0), vec3(0.2,0.0,0.0), vec3(0.0,0.2,0.0) );
1167 		//	  vec3  direction = vec3(0.0,0.0,-1.0);
1168 		//	  hitValue        = vec4(0.0,0.0,0.0,0.0);
1169 		//	  traceRaysTopWrapper(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direction, tmax);
1170 		//	  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);
1171 		//	}
1172 
1173 		std::ostringstream rgen;
1174 		rgen
1175 			<< "; SPIR-V\n"
1176 			<< "; Version: 1.4\n"
1177 			<< "; Generator: Khronos Glslang Reference Front End; 10\n"
1178 			<< "; Bound: 156\n"
1179 			<< "; Schema: 0\n"
1180 			<< "OpCapability RayTracingKHR\n"
1181 			<< "OpExtension \"SPV_KHR_ray_tracing\"\n"
1182 			<< "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1183 			<< "OpMemoryModel Logical GLSL450\n"
1184 			<< "OpEntryPoint RayGenerationKHR %4 \"main\" %59 %82 %88 %130 %148\n"
1185 			<< "OpDecorate %59 Location 0\n"
1186 			<< "OpDecorate %82 BuiltIn LaunchIdKHR\n"
1187 			<< "OpDecorate %88 BuiltIn LaunchSizeKHR\n"
1188 			<< "OpDecorate %130 DescriptorSet 0\n"
1189 			<< "OpDecorate %130 Binding 1\n"
1190 			<< "OpDecorate %148 DescriptorSet 0\n"
1191 			<< "OpDecorate %148 Binding 0\n"
1192 			<< "%2 = OpTypeVoid\n"
1193 			<< "%3 = OpTypeFunction %2\n"
1194 
1195 			// This is the bare type.
1196 			<< "%6 = OpTypeAccelerationStructureKHR\n"
1197 
1198 			// This is the pointer type.
1199 			<< "%7 = OpTypePointer UniformConstant %6\n"
1200 
1201 			<< "%8 = OpTypeInt 32 0\n"
1202 			<< "%9 = OpTypePointer Function %8\n"
1203 			<< "%10 = OpTypeFloat 32\n"
1204 			<< "%11 = OpTypeVector %10 3\n"
1205 			<< "%12 = OpTypePointer Function %11\n"
1206 			<< "%13 = OpTypePointer Function %10\n"
1207 
1208 			// This is the type for traceRaysTopWrapper and also the original traceRaysBottomWrapper.
1209 			<< "%14 = OpTypeFunction %2 %7 %9 %9 %9 %9 %9 %12 %13 %12 %13\n"
1210 
1211 			// This is the modified type to take a bare AS as the first argument, for the modified version of traceRaysBottomWrapper.
1212 			<< "%14b = OpTypeFunction %2 %6 %9 %9 %9 %9 %9 %12 %13 %12 %13\n"
1213 
1214 			<< "%39 = OpTypeFunction %11 %12 %12 %12\n"
1215 			<< "%55 = OpTypeInt 32 1\n"
1216 			<< "%56 = OpConstant %55 0\n"
1217 			<< "%57 = OpTypeVector %10 4\n"
1218 			<< "%58 = OpTypePointer RayPayloadKHR %57\n"
1219 			<< "%59 = OpVariable %58 RayPayloadKHR\n"
1220 			<< "%80 = OpTypeVector %8 3\n"
1221 			<< "%81 = OpTypePointer Input %80\n"
1222 			<< "%82 = OpVariable %81 Input\n"
1223 			<< "%83 = OpConstant %8 0\n"
1224 			<< "%84 = OpTypePointer Input %8\n"
1225 			<< "%88 = OpVariable %81 Input\n"
1226 			<< "%91 = OpConstant %8 1\n"
1227 			<< "%112 = OpConstant %10 0\n"
1228 			<< "%114 = OpConstant %10 2\n"
1229 			<< "%116 = OpConstant %10 0.100000001\n"
1230 			<< "%117 = OpConstant %10 1\n"
1231 			<< "%118 = OpConstantComposite %11 %116 %116 %117\n"
1232 			<< "%119 = OpConstant %10 0.200000003\n"
1233 			<< "%120 = OpConstantComposite %11 %119 %112 %112\n"
1234 			<< "%121 = OpConstantComposite %11 %112 %119 %112\n"
1235 			<< "%127 = OpConstant %10 -1\n"
1236 			<< "%128 = OpConstantComposite %11 %112 %112 %127\n"
1237 			<< "%129 = OpConstantComposite %57 %112 %112 %112 %112\n"
1238 			<< "%130 = OpVariable %7 UniformConstant\n"
1239 			<< "%131 = OpConstant %8 255\n"
1240 			<< "%146 = OpTypeImage %10 2D 0 0 0 2 R32f\n"
1241 			<< "%147 = OpTypePointer UniformConstant %146\n"
1242 			<< "%148 = OpVariable %147 UniformConstant\n"
1243 			<< "%150 = OpTypeVector %8 2\n"
1244 			<< "%153 = OpTypeVector %55 2\n"
1245 
1246 			// This is main().
1247 			<< "%4 = OpFunction %2 None %3\n"
1248 			<< "%5 = OpLabel\n"
1249 			<< "%111 = OpVariable %13 Function\n"
1250 			<< "%113 = OpVariable %13 Function\n"
1251 			<< "%115 = OpVariable %12 Function\n"
1252 			<< "%122 = OpVariable %12 Function\n"
1253 			<< "%123 = OpVariable %12 Function\n"
1254 			<< "%124 = OpVariable %12 Function\n"
1255 			<< "%126 = OpVariable %12 Function\n"
1256 			<< "%132 = OpVariable %9 Function\n"
1257 			<< "%133 = OpVariable %9 Function\n"
1258 			<< "%134 = OpVariable %9 Function\n"
1259 			<< "%135 = OpVariable %9 Function\n"
1260 			<< "%136 = OpVariable %9 Function\n"
1261 			<< "%137 = OpVariable %12 Function\n"
1262 			<< "%139 = OpVariable %13 Function\n"
1263 			<< "%141 = OpVariable %12 Function\n"
1264 			<< "%143 = OpVariable %13 Function\n"
1265 			<< "OpStore %111 %112\n"
1266 			<< "OpStore %113 %114\n"
1267 			<< "OpStore %122 %118\n"
1268 			<< "OpStore %123 %120\n"
1269 			<< "OpStore %124 %121\n"
1270 			<< "%125 = OpFunctionCall %11 %43 %122 %123 %124\n"
1271 			<< "OpStore %115 %125\n"
1272 			<< "OpStore %126 %128\n"
1273 			<< "OpStore %59 %129\n"
1274 			<< "OpStore %132 %83\n"
1275 			<< "OpStore %133 %131\n"
1276 			<< "OpStore %134 %83\n"
1277 			<< "OpStore %135 %83\n"
1278 			<< "OpStore %136 %83\n"
1279 			<< "%138 = OpLoad %11 %115\n"
1280 			<< "OpStore %137 %138\n"
1281 			<< "%140 = OpLoad %10 %111\n"
1282 			<< "OpStore %139 %140\n"
1283 			<< "%142 = OpLoad %11 %126\n"
1284 			<< "OpStore %141 %142\n"
1285 			<< "%144 = OpLoad %10 %113\n"
1286 			<< "OpStore %143 %144\n"
1287 			<< "%145 = OpFunctionCall %2 %37 %130 %132 %133 %134 %135 %136 %137 %139 %141 %143\n"
1288 			<< "%149 = OpLoad %146 %148\n"
1289 			<< "%151 = OpLoad %80 %82\n"
1290 			<< "%152 = OpVectorShuffle %150 %151 %151 0 1\n"
1291 			<< "%154 = OpBitcast %153 %152\n"
1292 			<< "%155 = OpLoad %57 %59\n"
1293 			<< "OpImageWrite %149 %154 %155\n"
1294 			<< "OpReturn\n"
1295 			<< "OpFunctionEnd\n"
1296 
1297 			// This is traceRaysBottomWrapper, doing the OpTraceRayKHR call.
1298 			// We have modified the type so it takes a bare AS as the first argument.
1299 			// %25 = OpFunction %2 None %14
1300 			<< "%25 = OpFunction %2 None %14b\n"
1301 
1302 			// Also the type of the first argument here.
1303 			// %15 = OpFunctionParameter %7
1304 			<< "%15 = OpFunctionParameter %6\n"
1305 
1306 			<< "%16 = OpFunctionParameter %9\n"
1307 			<< "%17 = OpFunctionParameter %9\n"
1308 			<< "%18 = OpFunctionParameter %9\n"
1309 			<< "%19 = OpFunctionParameter %9\n"
1310 			<< "%20 = OpFunctionParameter %9\n"
1311 			<< "%21 = OpFunctionParameter %12\n"
1312 			<< "%22 = OpFunctionParameter %13\n"
1313 			<< "%23 = OpFunctionParameter %12\n"
1314 			<< "%24 = OpFunctionParameter %13\n"
1315 			<< "%26 = OpLabel\n"
1316 
1317 			// We no longer need to dereference the pointer here.
1318 			// %45 = OpLoad %6 %15
1319 
1320 			<< "%46 = OpLoad %8 %16\n"
1321 			<< "%47 = OpLoad %8 %17\n"
1322 			<< "%48 = OpLoad %8 %18\n"
1323 			<< "%49 = OpLoad %8 %19\n"
1324 			<< "%50 = OpLoad %8 %20\n"
1325 			<< "%51 = OpLoad %11 %21\n"
1326 			<< "%52 = OpLoad %10 %22\n"
1327 			<< "%53 = OpLoad %11 %23\n"
1328 			<< "%54 = OpLoad %10 %24\n"
1329 
1330 			// And we can use the first argument here directly.
1331 			// OpTraceRayKHR %45 %46 %47 %48 %49 %50 %51 %52 %53 %54 %59
1332 			<< "OpTraceRayKHR %15 %46 %47 %48 %49 %50 %51 %52 %53 %54 %59\n"
1333 
1334 			<< "OpReturn\n"
1335 			<< "OpFunctionEnd\n"
1336 
1337 			// This is traceRaysTopWrapper, which calls traceRaysBottomWrapper.
1338 			<< "%37 = OpFunction %2 None %14\n"
1339 
1340 			// First argument, pointer to AS.
1341 			<< "%27 = OpFunctionParameter %7\n"
1342 
1343 			<< "%28 = OpFunctionParameter %9\n"
1344 			<< "%29 = OpFunctionParameter %9\n"
1345 			<< "%30 = OpFunctionParameter %9\n"
1346 			<< "%31 = OpFunctionParameter %9\n"
1347 			<< "%32 = OpFunctionParameter %9\n"
1348 			<< "%33 = OpFunctionParameter %12\n"
1349 			<< "%34 = OpFunctionParameter %13\n"
1350 			<< "%35 = OpFunctionParameter %12\n"
1351 			<< "%36 = OpFunctionParameter %13\n"
1352 			<< "%38 = OpLabel\n"
1353 			<< "%60 = OpVariable %9 Function\n"
1354 			<< "%62 = OpVariable %9 Function\n"
1355 			<< "%64 = OpVariable %9 Function\n"
1356 			<< "%66 = OpVariable %9 Function\n"
1357 			<< "%68 = OpVariable %9 Function\n"
1358 			<< "%70 = OpVariable %12 Function\n"
1359 			<< "%72 = OpVariable %13 Function\n"
1360 			<< "%74 = OpVariable %12 Function\n"
1361 			<< "%76 = OpVariable %13 Function\n"
1362 
1363 			// Dereference the pointer to pass the AS as the first argument.
1364 			<< "%27b = OpLoad %6 %27\n"
1365 
1366 			<< "%61 = OpLoad %8 %28\n"
1367 			<< "OpStore %60 %61\n"
1368 			<< "%63 = OpLoad %8 %29\n"
1369 			<< "OpStore %62 %63\n"
1370 			<< "%65 = OpLoad %8 %30\n"
1371 			<< "OpStore %64 %65\n"
1372 			<< "%67 = OpLoad %8 %31\n"
1373 			<< "OpStore %66 %67\n"
1374 			<< "%69 = OpLoad %8 %32\n"
1375 			<< "OpStore %68 %69\n"
1376 			<< "%71 = OpLoad %11 %33\n"
1377 			<< "OpStore %70 %71\n"
1378 			<< "%73 = OpLoad %10 %34\n"
1379 			<< "OpStore %72 %73\n"
1380 			<< "%75 = OpLoad %11 %35\n"
1381 			<< "OpStore %74 %75\n"
1382 			<< "%77 = OpLoad %10 %36\n"
1383 			<< "OpStore %76 %77\n"
1384 
1385 			// %2 is void, %25 is traceRaysBottomWrapper and %27 was the first argument.
1386 			// We need to pass the loaded AS instead.
1387 			// %78 = OpFunctionCall %2 %25 %27 %60 %62 %64 %66 %68 %70 %72 %74 %76
1388 			<< "%78 = OpFunctionCall %2 %25 %27b %60 %62 %64 %66 %68 %70 %72 %74 %76\n"
1389 
1390 			<< "OpReturn\n"
1391 			<< "OpFunctionEnd\n"
1392 
1393 			// This is calculateOrigin().
1394 			<< "%43 = OpFunction %11 None %39\n"
1395 			<< "%40 = OpFunctionParameter %12\n"
1396 			<< "%41 = OpFunctionParameter %12\n"
1397 			<< "%42 = OpFunctionParameter %12\n"
1398 			<< "%44 = OpLabel\n"
1399 			<< "%79 = OpLoad %11 %40\n"
1400 			<< "%85 = OpAccessChain %84 %82 %83\n"
1401 			<< "%86 = OpLoad %8 %85\n"
1402 			<< "%87 = OpConvertUToF %10 %86\n"
1403 			<< "%89 = OpAccessChain %84 %88 %83\n"
1404 			<< "%90 = OpLoad %8 %89\n"
1405 			<< "%92 = OpISub %8 %90 %91\n"
1406 			<< "%93 = OpConvertUToF %10 %92\n"
1407 			<< "%94 = OpFDiv %10 %87 %93\n"
1408 			<< "%95 = OpLoad %11 %41\n"
1409 			<< "%96 = OpVectorTimesScalar %11 %95 %94\n"
1410 			<< "%97 = OpFAdd %11 %79 %96\n"
1411 			<< "%98 = OpAccessChain %84 %82 %91\n"
1412 			<< "%99 = OpLoad %8 %98\n"
1413 			<< "%100 = OpConvertUToF %10 %99\n"
1414 			<< "%101 = OpAccessChain %84 %88 %91\n"
1415 			<< "%102 = OpLoad %8 %101\n"
1416 			<< "%103 = OpISub %8 %102 %91\n"
1417 			<< "%104 = OpConvertUToF %10 %103\n"
1418 			<< "%105 = OpFDiv %10 %100 %104\n"
1419 			<< "%106 = OpLoad %11 %42\n"
1420 			<< "%107 = OpVectorTimesScalar %11 %106 %105\n"
1421 			<< "%108 = OpFAdd %11 %97 %107\n"
1422 			<< "OpReturnValue %108\n"
1423 			<< "OpFunctionEnd\n"
1424 			;
1425 
1426 		programCollection.spirvAsmSources.add("rgen_depth") << spvBuildOptions << rgen.str();
1427 	}
1428 
1429 	// chit_depth and miss_depth below have been left untouched.
1430 
1431 	{
1432 		std::stringstream css;
1433 		css <<
1434 			"#version 460 core\n"
1435 			"#extension GL_EXT_ray_tracing : require\n"
1436 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1437 			"void main()\n"
1438 			"{\n"
1439 			"  hitValue = vec4(gl_RayTmaxEXT,0.0,0.0,1.0);\n"
1440 			"}\n";
1441 
1442 		programCollection.glslSources.add("chit_depth") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1443 	}
1444 
1445 	{
1446 		std::stringstream css;
1447 		css <<
1448 			"#version 460 core\n"
1449 			"#extension GL_EXT_ray_tracing : require\n"
1450 			"layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1451 			"void main()\n"
1452 			"{\n"
1453 			"  hitValue = vec4(0.0,0.0,0.0,1.0);\n"
1454 			"}\n";
1455 
1456 		programCollection.glslSources.add("miss_depth") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1457 	}
1458 }
1459 
RayTracingASBasicTestInstance(Context & context,const TestParams & data)1460 RayTracingASBasicTestInstance::RayTracingASBasicTestInstance (Context& context, const TestParams& data)
1461 	: vkt::TestInstance		(context)
1462 	, m_data				(data)
1463 {
1464 }
1465 
runTest(const deUint32 workerThreadsCount)1466 de::MovePtr<BufferWithMemory> RayTracingASBasicTestInstance::runTest(const deUint32 workerThreadsCount)
1467 {
1468 	const InstanceInterface&			vki									= m_context.getInstanceInterface();
1469 	const DeviceInterface&				vkd									= m_context.getDeviceInterface();
1470 	const VkDevice						device								= m_context.getDevice();
1471 	const VkPhysicalDevice				physicalDevice						= m_context.getPhysicalDevice();
1472 	const deUint32						queueFamilyIndex					= m_context.getUniversalQueueFamilyIndex();
1473 	const VkQueue						queue								= m_context.getUniversalQueue();
1474 	Allocator&							allocator							= m_context.getDefaultAllocator();
1475 	const deUint32						pixelCount							= m_data.width * m_data.height;
1476 	const deUint32						shaderGroupHandleSize				= getShaderGroupSize(vki, physicalDevice);
1477 	const deUint32						shaderGroupBaseAlignment			= getShaderGroupBaseAlignment(vki, physicalDevice);
1478 	const bool							htCopy								= (workerThreadsCount != 0) && (m_data.operationType == OP_COPY);
1479 	const bool							htSerialize							= (workerThreadsCount != 0) && (m_data.operationType == OP_SERIALIZE);
1480 
1481 	const Move<VkDescriptorSetLayout>	descriptorSetLayout					= DescriptorSetLayoutBuilder()
1482 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
1483 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
1484 																					.build(vkd, device);
1485 	const Move<VkDescriptorPool>		descriptorPool						= DescriptorPoolBuilder()
1486 																					.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1487 																					.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1488 																					.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1489 	const Move<VkDescriptorSet>			descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
1490 	const Move<VkPipelineLayout>		pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
1491 
1492 	de::MovePtr<RayTracingPipeline>		rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
1493 	m_data.testConfiguration->initRayTracingShaders(rayTracingPipeline, m_context, m_data);
1494 	Move<VkPipeline>					pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
1495 
1496 	de::MovePtr<BufferWithMemory>		raygenShaderBindingTable;
1497 	de::MovePtr<BufferWithMemory>		hitShaderBindingTable;
1498 	de::MovePtr<BufferWithMemory>		missShaderBindingTable;
1499 	m_data.testConfiguration->initShaderBindingTables(rayTracingPipeline, m_context, m_data, *pipeline, shaderGroupHandleSize, shaderGroupBaseAlignment, raygenShaderBindingTable, hitShaderBindingTable, missShaderBindingTable);
1500 
1501 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(),	0),	shaderGroupHandleSize,	shaderGroupHandleSize);
1502 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(),		0),	shaderGroupHandleSize,	shaderGroupHandleSize);
1503 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(),		0),	shaderGroupHandleSize,	shaderGroupHandleSize);
1504 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL,																	0,						0);
1505 
1506 	const VkFormat						imageFormat							= m_data.testConfiguration->getResultImageFormat();
1507 	const VkImageCreateInfo				imageCreateInfo						= makeImageCreateInfo(m_data.width, m_data.height, imageFormat);
1508 	const VkImageSubresourceRange		imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
1509 	const de::MovePtr<ImageWithMemory>	image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
1510 	const Move<VkImageView>				imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, imageFormat, imageSubresourceRange);
1511 
1512 	const VkBufferCreateInfo			resultBufferCreateInfo				= makeBufferCreateInfo(pixelCount*m_data.testConfiguration->getResultImageFormatSize(), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1513 	const VkImageSubresourceLayers		resultBufferImageSubresourceLayers	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1514 	const VkBufferImageCopy				resultBufferImageRegion				= makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), resultBufferImageSubresourceLayers);
1515 	de::MovePtr<BufferWithMemory>		resultBuffer						= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
1516 
1517 	const VkDescriptorImageInfo			descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
1518 
1519 	const Move<VkCommandPool>			cmdPool								= createCommandPool(vkd, device, 0, queueFamilyIndex);
1520 	const Move<VkCommandBuffer>			cmdBuffer							= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1521 
1522 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomLevelAccelerationStructures;
1523 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructure;
1524 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomLevelAccelerationStructureCopies;
1525 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructureCopy;
1526 	std::vector<de::SharedPtr<SerialStorage>>						bottomSerialized;
1527 	std::vector<de::SharedPtr<SerialStorage>>						topSerialized;
1528 	std::vector<VkDeviceSize>			accelerationCompactedSizes;
1529 	std::vector<VkDeviceSize>			accelerationSerialSizes;
1530 	Move<VkQueryPool>					m_queryPoolCompact;
1531 	Move<VkQueryPool>					m_queryPoolSerial;
1532 
1533 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
1534 	{
1535 		const VkImageMemoryBarrier				preImageBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
1536 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1537 			**image, imageSubresourceRange);
1538 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
1539 		const VkClearValue						clearValue = m_data.testConfiguration->getClearValue();
1540 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
1541 		const VkImageMemoryBarrier				postImageBarrier = makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
1542 			VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
1543 			**image, imageSubresourceRange);
1544 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
1545 
1546 		// build bottom level acceleration structures and their copies ( only when we are testing copying bottom level acceleration structures )
1547 		bool									bottomCompact		= m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
1548 		bool									bottomSerial		= m_data.operationType == OP_SERIALIZE && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
1549 		const bool								buildWithoutGeom	= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM);
1550 		const bool								bottomNoPrimitives	= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM);
1551 		const bool								topNoPrimitives		= (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP);
1552 		const bool								inactiveInstances	= (m_data.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_INSTANCES);
1553 		bottomLevelAccelerationStructures							= m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
1554 		VkBuildAccelerationStructureFlagsKHR	allowCompactionFlag	= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR;
1555 		VkBuildAccelerationStructureFlagsKHR	emptyCompactionFlag	= VkBuildAccelerationStructureFlagsKHR(0);
1556 		VkBuildAccelerationStructureFlagsKHR	bottomCompactFlags	= (bottomCompact ? allowCompactionFlag : emptyCompactionFlag);
1557 		VkBuildAccelerationStructureFlagsKHR	bottomBuildFlags	= m_data.buildFlags | bottomCompactFlags;
1558 		std::vector<VkAccelerationStructureKHR>	accelerationStructureHandles;
1559 		std::vector<VkDeviceSize>				bottomBlasCompactSize;
1560 		std::vector<VkDeviceSize>				bottomBlasSerialSize;
1561 
1562 		for (auto& blas : bottomLevelAccelerationStructures)
1563 		{
1564 			blas->setBuildType						(m_data.buildType);
1565 			blas->setBuildFlags						(bottomBuildFlags);
1566 			blas->setUseArrayOfPointers				(m_data.bottomUsesAOP);
1567 			blas->setCreateGeneric					(m_data.bottomGeneric);
1568 			blas->setBuildWithoutGeometries			(buildWithoutGeom);
1569 			blas->setBuildWithoutPrimitives			(bottomNoPrimitives);
1570 			blas->createAndBuild					(vkd, device, *cmdBuffer, allocator);
1571 			accelerationStructureHandles.push_back	(*(blas->getPtr()));
1572 		}
1573 
1574 		if (m_data.operationType == OP_COMPACT)
1575 		{
1576 			deUint32 queryCount	= (m_data.operationTarget == OT_BOTTOM_ACCELERATION) ? deUint32(bottomLevelAccelerationStructures.size()) : 1u;
1577 			if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1578 				m_queryPoolCompact = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount);
1579 			if (m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1580 				queryAccelerationStructureSize(vkd, device, *cmdBuffer, accelerationStructureHandles, m_data.buildType, m_queryPoolCompact.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, bottomBlasCompactSize);
1581 		}
1582 		if (m_data.operationType == OP_SERIALIZE)
1583 		{
1584 			deUint32 queryCount	= (m_data.operationTarget == OT_BOTTOM_ACCELERATION) ? deUint32(bottomLevelAccelerationStructures.size()) : 1u;
1585 			if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1586 				m_queryPoolSerial = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
1587 			if (m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1588 				queryAccelerationStructureSize(vkd, device, *cmdBuffer, accelerationStructureHandles, m_data.buildType, m_queryPoolSerial.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, bottomBlasSerialSize);
1589 		}
1590 
1591 		// if AS is built on GPU and we are planning to make a compact copy of it or serialize / deserialize it - we have to have download query results to CPU
1592 		if ((m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) && (bottomCompact || bottomSerial))
1593 		{
1594 			endCommandBuffer(vkd, *cmdBuffer);
1595 
1596 			submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1597 
1598 			if (bottomCompact)
1599 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolCompact, 0u, deUint32(bottomBlasCompactSize.size()), sizeof(VkDeviceSize) * bottomBlasCompactSize.size(), bottomBlasCompactSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1600 			if (bottomSerial)
1601 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolSerial, 0u, deUint32(bottomBlasSerialSize.size()), sizeof(VkDeviceSize) * bottomBlasSerialSize.size(), bottomBlasSerialSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1602 
1603 			vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1604 			beginCommandBuffer(vkd, *cmdBuffer, 0u);
1605 		}
1606 
1607 		auto bottomLevelAccelerationStructuresPtr								= &bottomLevelAccelerationStructures;
1608 		if (m_data.operationType != OP_NONE && m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1609 		{
1610 			switch (m_data.operationType)
1611 			{
1612 			case OP_COPY:
1613 			{
1614 				for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1615 				{
1616 					de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1617 					asCopy->setDeferredOperation(htCopy, workerThreadsCount);
1618 					asCopy->setBuildType(m_data.buildType);
1619 					asCopy->setBuildFlags(m_data.buildFlags);
1620 					asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1621 					asCopy->setCreateGeneric(m_data.bottomGeneric);
1622 					asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1623 					asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1624 					asCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, bottomLevelAccelerationStructures[i].get(), 0u, 0u);
1625 					bottomLevelAccelerationStructureCopies.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1626 				}
1627 				break;
1628 			}
1629 			case OP_COMPACT:
1630 			{
1631 				for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1632 				{
1633 					de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1634 					asCopy->setBuildType(m_data.buildType);
1635 					asCopy->setBuildFlags(m_data.buildFlags);
1636 					asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1637 					asCopy->setCreateGeneric(m_data.bottomGeneric);
1638 					asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1639 					asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1640 					asCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, bottomLevelAccelerationStructures[i].get(), bottomBlasCompactSize[i], 0u);
1641 					bottomLevelAccelerationStructureCopies.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1642 				}
1643 				break;
1644 			}
1645 			case OP_SERIALIZE:
1646 			{
1647 				//bottomLevelAccelerationStructureCopies = m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
1648 				for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1649 				{
1650 					de::SharedPtr<SerialStorage> storage ( new SerialStorage(vkd, device, allocator, m_data.buildType, bottomBlasSerialSize[i]));
1651 
1652 					bottomLevelAccelerationStructures[i]->setDeferredOperation(htSerialize, workerThreadsCount);
1653 					bottomLevelAccelerationStructures[i]->serialize(vkd, device, *cmdBuffer, storage.get());
1654 					bottomSerialized.push_back(storage);
1655 
1656 					if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1657 					{
1658 						endCommandBuffer(vkd, *cmdBuffer);
1659 
1660 						submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1661 
1662 						vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1663 						beginCommandBuffer(vkd, *cmdBuffer, 0u);
1664 					}
1665 
1666 					de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1667 					asCopy->setBuildType(m_data.buildType);
1668 					asCopy->setBuildFlags(m_data.buildFlags);
1669 					asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1670 					asCopy->setCreateGeneric(m_data.bottomGeneric);
1671 					asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1672 					asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1673 					asCopy->setDeferredOperation(htSerialize, workerThreadsCount);
1674 					asCopy->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, storage.get(), 0u);
1675 					bottomLevelAccelerationStructureCopies.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1676 				}
1677 				break;
1678 			}
1679 			default:
1680 				DE_ASSERT(DE_FALSE);
1681 			}
1682 			bottomLevelAccelerationStructuresPtr = &bottomLevelAccelerationStructureCopies;
1683 		}
1684 
1685 		// build top level acceleration structures and their copies ( only when we are testing copying top level acceleration structures )
1686 		bool									topCompact			= m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_TOP_ACCELERATION;
1687 		bool									topSerial			= m_data.operationType == OP_SERIALIZE && m_data.operationTarget == OT_TOP_ACCELERATION;
1688 		VkBuildAccelerationStructureFlagsKHR	topCompactFlags		= (topCompact ? allowCompactionFlag : emptyCompactionFlag);
1689 		VkBuildAccelerationStructureFlagsKHR	topBuildFlags		= m_data.buildFlags | topCompactFlags;
1690 		std::vector<VkAccelerationStructureKHR> topLevelStructureHandles;
1691 		std::vector<VkDeviceSize>				topBlasCompactSize;
1692 		std::vector<VkDeviceSize>				topBlasSerialSize;
1693 
1694 		topLevelAccelerationStructure								= m_data.testConfiguration->initTopAccelerationStructure(m_context, m_data, *bottomLevelAccelerationStructuresPtr);
1695 		topLevelAccelerationStructure->setBuildType					(m_data.buildType);
1696 		topLevelAccelerationStructure->setBuildFlags				(topBuildFlags);
1697 		topLevelAccelerationStructure->setBuildWithoutPrimitives	(topNoPrimitives);
1698 		topLevelAccelerationStructure->setUseArrayOfPointers		(m_data.topUsesAOP);
1699 		topLevelAccelerationStructure->setCreateGeneric				(m_data.topGeneric);
1700 		topLevelAccelerationStructure->setInactiveInstances			(inactiveInstances);
1701 		topLevelAccelerationStructure->createAndBuild				(vkd, device, *cmdBuffer, allocator);
1702 		topLevelStructureHandles.push_back							(*(topLevelAccelerationStructure->getPtr()));
1703 
1704 		if (topCompact)
1705 			queryAccelerationStructureSize(vkd, device, *cmdBuffer, topLevelStructureHandles, m_data.buildType, m_queryPoolCompact.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, topBlasCompactSize);
1706 		if (topSerial)
1707 			queryAccelerationStructureSize(vkd, device, *cmdBuffer, topLevelStructureHandles, m_data.buildType, m_queryPoolSerial.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, topBlasSerialSize);
1708 
1709 		// if AS is built on GPU and we are planning to make a compact copy of it or serialize / deserialize it - we have to have download query results to CPU
1710 		if ((m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) && (topCompact || topSerial))
1711 		{
1712 			endCommandBuffer(vkd, *cmdBuffer);
1713 
1714 			submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1715 
1716 			if (topCompact)
1717 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolCompact, 0u, deUint32(topBlasCompactSize.size()), sizeof(VkDeviceSize) * topBlasCompactSize.size(), topBlasCompactSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1718 			if (topSerial)
1719 				VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolSerial, 0u, deUint32(topBlasSerialSize.size()), sizeof(VkDeviceSize) * topBlasSerialSize.size(), topBlasSerialSize.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1720 
1721 			vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1722 			beginCommandBuffer(vkd, *cmdBuffer, 0u);
1723 		}
1724 
1725 		const TopLevelAccelerationStructure*			topLevelRayTracedPtr	= topLevelAccelerationStructure.get();
1726 		if (m_data.operationType != OP_NONE && m_data.operationTarget == OT_TOP_ACCELERATION)
1727 		{
1728 			switch (m_data.operationType)
1729 			{
1730 				case OP_COPY:
1731 				{
1732 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
1733 					topLevelAccelerationStructureCopy->setDeferredOperation(htCopy, workerThreadsCount);
1734 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
1735 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
1736 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
1737 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
1738 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
1739 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
1740 					topLevelAccelerationStructureCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, topLevelAccelerationStructure.get(), 0u, 0u);
1741 					break;
1742 				}
1743 				case OP_COMPACT:
1744 				{
1745 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
1746 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
1747 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
1748 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
1749 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
1750 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
1751 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
1752 					topLevelAccelerationStructureCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator, topLevelAccelerationStructure.get(), topBlasCompactSize[0], 0u);
1753 					break;
1754 				}
1755 				case OP_SERIALIZE:
1756 				{
1757 					de::SharedPtr<SerialStorage> storage = de::SharedPtr<SerialStorage>(new SerialStorage(vkd, device, allocator, m_data.buildType, topBlasSerialSize[0]));
1758 
1759 					topLevelAccelerationStructure->setDeferredOperation(htSerialize, workerThreadsCount);
1760 					topLevelAccelerationStructure->serialize(vkd, device, *cmdBuffer, storage.get());
1761 					topSerialized.push_back(storage);
1762 
1763 					if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1764 					{
1765 						endCommandBuffer(vkd, *cmdBuffer);
1766 
1767 						submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1768 
1769 						vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1770 						beginCommandBuffer(vkd, *cmdBuffer, 0u);
1771 					}
1772 
1773 					topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
1774 					topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
1775 					topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
1776 					topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
1777 					topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
1778 					topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
1779 					topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
1780 					topLevelAccelerationStructureCopy->setDeferredOperation(htSerialize, workerThreadsCount);
1781 					topLevelAccelerationStructureCopy->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, storage.get(), 0u);
1782 					break;
1783 				}
1784 				default:
1785 					DE_ASSERT(DE_FALSE);
1786 			}
1787 			topLevelRayTracedPtr = topLevelAccelerationStructureCopy.get();
1788 		}
1789 
1790 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
1791 		{
1792 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
1793 			DE_NULL,															//  const void*							pNext;
1794 			1u,																	//  deUint32							accelerationStructureCount;
1795 			topLevelRayTracedPtr->getPtr(),										//  const VkAccelerationStructureKHR*	pAccelerationStructures;
1796 		};
1797 
1798 		DescriptorSetUpdateBuilder()
1799 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
1800 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
1801 			.update(vkd, device);
1802 
1803 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
1804 
1805 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
1806 
1807 		cmdTraceRays(vkd,
1808 			*cmdBuffer,
1809 			&raygenShaderBindingTableRegion,
1810 			&missShaderBindingTableRegion,
1811 			&hitShaderBindingTableRegion,
1812 			&callableShaderBindingTableRegion,
1813 			m_data.width, m_data.height, 1);
1814 
1815 		const VkMemoryBarrier				postTraceMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
1816 		const VkMemoryBarrier				postCopyMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1817 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
1818 
1819 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u, &resultBufferImageRegion);
1820 
1821 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
1822 	}
1823 	endCommandBuffer(vkd, *cmdBuffer);
1824 
1825 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1826 
1827 	invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(), resultBuffer->getAllocation().getOffset(), pixelCount * sizeof(deUint32));
1828 
1829 	return resultBuffer;
1830 }
1831 
iterateNoWorkers(void)1832 bool RayTracingASBasicTestInstance::iterateNoWorkers (void)
1833 {
1834 	// run test using arrays of pointers
1835 	const de::MovePtr<BufferWithMemory>	buffer		= runTest(0);
1836 
1837 	return m_data.testConfiguration->verifyImage(buffer.get(), m_context, m_data);
1838 }
1839 
iterateWithWorkers(void)1840 bool RayTracingASBasicTestInstance::iterateWithWorkers (void)
1841 {
1842 	const deUint64					singleThreadTimeStart	= deGetMicroseconds();
1843 	de::MovePtr<BufferWithMemory>	singleThreadBufferCPU	= runTest(0);
1844 	const bool						singleThreadValidation	= m_data.testConfiguration->verifyImage(singleThreadBufferCPU.get(), m_context, m_data);
1845 	const deUint64					singleThreadTime		= deGetMicroseconds() - singleThreadTimeStart;
1846 
1847 	deUint64						multiThreadTimeStart	= deGetMicroseconds();
1848 	de::MovePtr<BufferWithMemory>	multiThreadBufferCPU	= runTest(m_data.workerThreadsCount);
1849 	const bool						multiThreadValidation	= m_data.testConfiguration->verifyImage(multiThreadBufferCPU.get(), m_context, m_data);
1850 	deUint64						multiThreadTime			= deGetMicroseconds() - multiThreadTimeStart;
1851 	const deUint64					multiThreadTimeOut		= 10 * singleThreadTime;
1852 
1853 	const deUint32					result					= singleThreadValidation && multiThreadValidation;
1854 
1855 	if (multiThreadTime > multiThreadTimeOut)
1856 	{
1857 		std::string failMsg	= "Time of multithreaded test execution " + de::toString(multiThreadTime) +
1858 							  " that is longer than expected execution time " + de::toString(multiThreadTimeOut);
1859 
1860 		TCU_FAIL(failMsg);
1861 	}
1862 
1863 	return result;
1864 }
1865 
iterate(void)1866 tcu::TestStatus RayTracingASBasicTestInstance::iterate (void)
1867 {
1868 	bool result;
1869 
1870 	if (m_data.workerThreadsCount != 0)
1871 		result = iterateWithWorkers();
1872 	else
1873 		result = iterateNoWorkers();
1874 
1875 	if (result)
1876 		return tcu::TestStatus::pass("Pass");
1877 	else
1878 		return tcu::TestStatus::fail("Fail");
1879 }
1880 
1881 // Tests dynamic indexing of acceleration structures
1882 class RayTracingASDynamicIndexingTestCase : public TestCase
1883 {
1884 public:
1885 						RayTracingASDynamicIndexingTestCase			(tcu::TestContext& context, const char* name);
1886 						~RayTracingASDynamicIndexingTestCase		(void) = default;
1887 
1888 	void				checkSupport								(Context& context) const override;
1889 	void				initPrograms								(SourceCollections& programCollection) const override;
1890 	TestInstance*		createInstance								(Context& context) const override;
1891 };
1892 
1893 class RayTracingASDynamicIndexingTestInstance : public TestInstance
1894 {
1895 public:
1896 						RayTracingASDynamicIndexingTestInstance		(Context& context);
1897 						~RayTracingASDynamicIndexingTestInstance	(void) = default;
1898 	tcu::TestStatus		iterate										(void) override;
1899 };
1900 
RayTracingASDynamicIndexingTestCase(tcu::TestContext & context,const char * name)1901 RayTracingASDynamicIndexingTestCase::RayTracingASDynamicIndexingTestCase(tcu::TestContext& context, const char* name)
1902 	: TestCase(context, name, "")
1903 {
1904 }
1905 
checkSupport(Context & context) const1906 void RayTracingASDynamicIndexingTestCase::checkSupport(Context& context) const
1907 {
1908 	commonASTestsCheckSupport(context);
1909 	context.requireDeviceFunctionality("VK_EXT_descriptor_indexing");
1910 }
1911 
initPrograms(SourceCollections & programCollection) const1912 void RayTracingASDynamicIndexingTestCase::initPrograms(SourceCollections& programCollection) const
1913 {
1914 	const vk::SpirVAsmBuildOptions spvBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, true);
1915 	const vk::ShaderBuildOptions glslBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
1916 
1917 	// raygen shader is defined in spir-v as it requires possing pointer to TLAS that was read from ssbo;
1918 	// original spir-v code was generated using following glsl code but resulting spir-v code was modiifed
1919 	//
1920 	// #version 460 core
1921 	// #extension GL_EXT_ray_tracing : require
1922 	// #extension GL_EXT_nonuniform_qualifier : enable
1923 	// #define ARRAY_SIZE 500
1924 	// layout(location = 0) rayPayloadEXT uvec2 payload;	// offset and flag indicating if we are using descriptors or pointers
1925 
1926 	// layout(set = 0, binding = 0) uniform accelerationStructureEXT tlasArray[ARRAY_SIZE];
1927 	// layout(set = 0, binding = 1) readonly buffer topLevelASPointers {
1928 	//     uvec2 ptr[];
1929 	// } tlasPointers;
1930 	// layout(set = 0, binding = 2) readonly buffer topLevelASIndices {
1931 	//     uint idx[];
1932 	// } tlasIndices;
1933 	// layout(set = 0, binding = 3, std430) writeonly buffer Result {
1934 	//     uint value[];
1935 	// } result;
1936 
1937 	// void main()
1938 	// {
1939 	//   float tmin            = 0.0;\n"
1940 	//   float tmax            = 2.0;\n"
1941 	//   vec3  origin          = vec3(0.25f, 0.5f, 1.0);\n"
1942 	//   vec3  direction       = vec3(0.0,0.0,-1.0);\n"
1943 	//   uint  activeTlasIndex = gl_LaunchIDEXT.x;\n"
1944 	//   uint  activeTlasCount = gl_LaunchSizeEXT.x;\n"
1945 	//   uint  tlasIndex       = tlasIndices.idx[nonuniformEXT(activeTlasIndex)];\n"
1946 
1947 	//   atomicAdd(result.value[nonuniformEXT(activeTlasIndex)], 2);\n"
1948 	//   payload = uvec2(activeTlasIndex + activeTlasCount.x, 0);\n"
1949 	//   traceRayEXT(tlasArray[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);\n"
1950 
1951 	//   atomicAdd(result.value[nonuniformEXT(activeTlasIndex + activeTlasCount * 2)], 5);\n"
1952 	//   payload = uvec2(activeTlasIndex + activeTlasCount * 3, 1);\n"
1953 	//   traceRayEXT(tlasArray[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);				// used to generate initial spirv
1954 	//   //traceRayEXT(*tlasPointers.ptr[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);	// not available in glsl but should be done in spirv
1955 	// };
1956 
1957 	const std::string rgenSource =
1958 		"OpCapability RayTracingKHR\n"
1959 		"OpCapability ShaderNonUniform\n"
1960 		"OpExtension \"SPV_EXT_descriptor_indexing\"\n"
1961 		"OpExtension \"SPV_KHR_ray_tracing\"\n"
1962 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1963 		"OpMemoryModel Logical GLSL450\n"
1964 		"OpEntryPoint RayGenerationKHR %4 \"main\" %27 %33 %var_tlas_indices %var_result %60 %var_as_arr_ptr %var_as_pointers_ssbo\n"
1965 		"OpDecorate %27 BuiltIn LaunchIdNV\n"
1966 		"OpDecorate %33 BuiltIn LaunchSizeNV\n"
1967 		"OpDecorate %37 ArrayStride 4\n"
1968 		"OpMemberDecorate %38 0 NonWritable\n"
1969 		"OpMemberDecorate %38 0 Offset 0\n"
1970 		"OpDecorate %38 Block\n"
1971 		"OpDecorate %var_tlas_indices DescriptorSet 0\n"
1972 		"OpDecorate %var_tlas_indices Binding 2\n"
1973 		"OpDecorate %44 NonUniform\n"
1974 		"OpDecorate %46 NonUniform\n"
1975 		"OpDecorate %47 NonUniform\n"
1976 		"OpDecorate %48 ArrayStride 4\n"
1977 		"OpMemberDecorate %49 0 NonReadable\n"
1978 		"OpMemberDecorate %49 0 Offset 0\n"
1979 		"OpDecorate %49 Block\n"
1980 		"OpDecorate %var_result DescriptorSet 0\n"
1981 		"OpDecorate %var_result Binding 3\n"
1982 		"OpDecorate %53 NonUniform\n"
1983 		"OpDecorate %60 Location 0\n"
1984 		"OpDecorate %var_as_arr_ptr DescriptorSet 0\n"
1985 		"OpDecorate %var_as_arr_ptr Binding 0\n"
1986 		"OpDecorate %71 NonUniform\n"
1987 		"OpDecorate %73 NonUniform\n"
1988 		"OpDecorate %74 NonUniform\n"
1989 		"OpDecorate %85 NonUniform\n"
1990 		"OpDecorate %as_index NonUniform\n"
1991 		"OpDecorate %as_device_addres NonUniform\n"
1992 		"OpDecorate %104 ArrayStride 8\n"
1993 		"OpMemberDecorate %105 0 NonWritable\n"
1994 		"OpMemberDecorate %105 0 Offset 0\n"
1995 		"OpDecorate %105 Block\n"
1996 		"OpDecorate %var_as_pointers_ssbo DescriptorSet 0\n"
1997 		"OpDecorate %var_as_pointers_ssbo Binding 1\n"
1998 		// types, constants and variables
1999 		"%2								= OpTypeVoid\n"
2000 		"%3								= OpTypeFunction %2\n"
2001 		"%6								= OpTypeFloat 32\n"
2002 		"%7								= OpTypePointer Function %6\n"
2003 		"%9								= OpConstant %6 0\n"
2004 		"%11							= OpConstant %6 2\n"
2005 		"%12							= OpTypeVector %6 3\n"
2006 		"%13							= OpTypePointer Function %12\n"
2007 		"%15							= OpConstant %6 0.25\n"
2008 		"%16							= OpConstant %6 0.5\n"
2009 		"%17							= OpConstant %6 1\n"
2010 		"%18							= OpConstantComposite %12 %15 %16 %17\n"
2011 		"%20							= OpConstant %6 -1\n"
2012 		"%21							= OpConstantComposite %12 %9 %9 %20\n"
2013 		"%type_uint32					= OpTypeInt 32 0\n"
2014 		"%23							= OpTypePointer Function %type_uint32\n"
2015 		"%25							= OpTypeVector %type_uint32 3\n"
2016 		"%26							= OpTypePointer Input %25\n"
2017 		"%27							= OpVariable %26 Input\n"
2018 		"%28							= OpConstant %type_uint32 0\n"
2019 		"%29							= OpTypePointer Input %type_uint32\n"
2020 		"%33							= OpVariable %26 Input\n"
2021 		"%37							= OpTypeRuntimeArray %type_uint32\n"
2022 		"%38							= OpTypeStruct %37\n"
2023 		"%39							= OpTypePointer StorageBuffer %38\n"
2024 		"%var_tlas_indices				= OpVariable %39 StorageBuffer\n"
2025 		"%type_int32					= OpTypeInt 32 1\n"
2026 		"%c_int32_0						= OpConstant %type_int32 0\n"
2027 		"%45							= OpTypePointer StorageBuffer %type_uint32\n"
2028 		"%48							= OpTypeRuntimeArray %type_uint32\n"
2029 		"%49							= OpTypeStruct %48\n"
2030 		"%50							= OpTypePointer StorageBuffer %49\n"
2031 		"%var_result					= OpVariable %50 StorageBuffer\n"
2032 		"%55							= OpConstant %type_uint32 2\n"
2033 		"%56							= OpConstant %type_uint32 1\n"
2034 		"%58							= OpTypeVector %type_uint32 2\n"
2035 		"%59							= OpTypePointer RayPayloadNV %58\n"
2036 		"%60							= OpVariable %59 RayPayloadNV\n"
2037 		"%type_as						= OpTypeAccelerationStructureKHR\n"
2038 		"%66							= OpConstant %type_uint32 500\n"
2039 		"%67							= OpTypeArray %type_as %66\n"
2040 		"%68							= OpTypePointer UniformConstant %67\n"
2041 		"%var_as_arr_ptr				= OpVariable %68 UniformConstant\n"
2042 		"%72							= OpTypePointer UniformConstant %type_as\n"
2043 		"%75							= OpConstant %type_uint32 16\n"
2044 		"%76							= OpConstant %type_uint32 255\n"
2045 		"%87							= OpConstant %type_uint32 5\n"
2046 		"%91							= OpConstant %type_uint32 3\n"
2047 
2048 		// <changed_section>
2049 		"%104							= OpTypeRuntimeArray %58\n"
2050 		"%105							= OpTypeStruct %104\n"
2051 		"%106							= OpTypePointer StorageBuffer %105\n"
2052 		"%var_as_pointers_ssbo			= OpVariable %106 StorageBuffer\n"
2053 		"%type_uint64_ssbo_ptr			= OpTypePointer StorageBuffer %58\n"
2054 		// </changed_section>
2055 
2056 		// void main()
2057 		"%4								= OpFunction %2 None %3\n"
2058 		"%5								= OpLabel\n"
2059 		"%8								= OpVariable %7 Function\n"
2060 		"%10							= OpVariable %7 Function\n"
2061 		"%14							= OpVariable %13 Function\n"
2062 		"%19							= OpVariable %13 Function\n"
2063 		"%24							= OpVariable %23 Function\n"
2064 		"%32							= OpVariable %23 Function\n"
2065 		"%36							= OpVariable %23 Function\n"
2066 		"OpStore %8 %9\n"
2067 		"OpStore %10 %11\n"
2068 		"OpStore %14 %18\n"
2069 		"OpStore %19 %21\n"
2070 		"%30							= OpAccessChain %29 %27 %28\n"
2071 		"%31							= OpLoad %type_uint32 %30\n"
2072 		"OpStore %24 %31\n"
2073 		"%34							= OpAccessChain %29 %33 %28\n"
2074 		"%35							= OpLoad %type_uint32 %34\n"
2075 		"OpStore %32 %35\n"
2076 		"%43							= OpLoad %type_uint32 %24\n"
2077 		"%44							= OpCopyObject %type_uint32 %43\n"
2078 		"%46							= OpAccessChain %45 %var_tlas_indices %c_int32_0 %44\n"
2079 		"%47							= OpLoad %type_uint32 %46\n"
2080 		"OpStore %36 %47\n"
2081 		// atomicAdd
2082 		"%52							= OpLoad %type_uint32 %24\n"
2083 		"%53							= OpCopyObject %type_uint32 %52\n"
2084 		"%54							= OpAccessChain %45 %var_result %c_int32_0 %53\n"
2085 		"%57							= OpAtomicIAdd %type_uint32 %54 %56 %28 %55\n"
2086 		// setup payload
2087 		"%61							= OpLoad %type_uint32 %24\n"
2088 		"%62							= OpLoad %type_uint32 %32\n"
2089 		"%63							= OpIAdd %type_uint32 %61 %62\n"
2090 		"%64							= OpCompositeConstruct %58 %63 %28\n"
2091 		"OpStore %60 %64\n"
2092 		// trace rays using tlas from array
2093 		"%70							= OpLoad %type_uint32 %36\n"
2094 		"%71							= OpCopyObject %type_uint32 %70\n"
2095 		"%73							= OpAccessChain %72 %var_as_arr_ptr %71\n"
2096 		"%74							= OpLoad %type_as %73\n"
2097 		"%77							= OpLoad %12 %14\n"
2098 		"%78							= OpLoad %6 %8\n"
2099 		"%79							= OpLoad %12 %19\n"
2100 		"%80							= OpLoad %6 %10\n"
2101 		"OpTraceRayKHR %74 %75 %76 %28 %28 %28 %77 %78 %79 %80 %60\n"
2102 		// atomicAdd
2103 		"%81							= OpLoad %type_uint32 %24\n"
2104 		"%82							= OpLoad %type_uint32 %32\n"
2105 		"%83							= OpIMul %type_uint32 %82 %55\n"
2106 		"%84							= OpIAdd %type_uint32 %81 %83\n"
2107 		"%85							= OpCopyObject %type_uint32 %84\n"
2108 		"%86							= OpAccessChain %45 %var_result %c_int32_0 %85\n"
2109 		"%88							= OpAtomicIAdd %type_uint32 %86 %56 %28 %87\n"
2110 		// setup payload
2111 		"%89							= OpLoad %type_uint32 %24\n"
2112 		"%90							= OpLoad %type_uint32 %32\n"
2113 		"%92							= OpIMul %type_uint32 %90 %91\n"
2114 		"%93							= OpIAdd %type_uint32 %89 %92\n"
2115 		"%94							= OpCompositeConstruct %58 %93 %56\n"
2116 		"OpStore %60 %94\n"
2117 		// trace rays using pointers to tlas
2118 		"%95							= OpLoad %type_uint32 %36\n"
2119 		"%as_index						= OpCopyObject %type_uint32 %95\n"
2120 
2121 		// <changed_section> OLD
2122 		"%as_device_addres_ptr			= OpAccessChain %type_uint64_ssbo_ptr %var_as_pointers_ssbo %c_int32_0 %as_index\n"
2123 		"%as_device_addres				= OpLoad %58 %as_device_addres_ptr\n"
2124 		"%as_to_use						= OpConvertUToAccelerationStructureKHR %type_as %as_device_addres\n"
2125 		// </changed_section>
2126 
2127 		"%99							= OpLoad %12 %14\n"
2128 		"%100							= OpLoad %6 %8\n"
2129 		"%101							= OpLoad %12 %19\n"
2130 		"%102							= OpLoad %6 %10\n"
2131 		"OpTraceRayKHR %as_to_use %75 %76 %28 %28 %28 %99 %100 %101 %102 %60\n"
2132 		"OpReturn\n"
2133 		"OpFunctionEnd\n";
2134 	programCollection.spirvAsmSources.add("rgen") << rgenSource << spvBuildOptions;
2135 
2136 	std::string chitSource =
2137 		"#version 460 core\n"
2138 		"#extension GL_EXT_ray_tracing : require\n"
2139 		"#extension GL_EXT_nonuniform_qualifier : enable\n"
2140 		"layout(location = 0) rayPayloadInEXT uvec2 payload;\n"
2141 		"\n"
2142 		"layout(set = 0, binding = 3) writeonly buffer Result {\n"
2143 		"    uint value[];\n"
2144 		"} result;\n"
2145 		"void main()\n"
2146 		"{\n"
2147 		     // payload.y is 0 or 1 so we will add 3 or 7 (just two prime numbers)
2148 		"    atomicAdd(result.value[nonuniformEXT(payload.x)], 3 + payload.y * 4);\n"
2149 		"}\n";
2150 	programCollection.glslSources.add("chit") << glu::ClosestHitSource(chitSource) << glslBuildOptions;
2151 }
2152 
createInstance(Context & context) const2153 TestInstance* RayTracingASDynamicIndexingTestCase::createInstance(Context& context) const
2154 {
2155 	return new RayTracingASDynamicIndexingTestInstance(context);
2156 }
2157 
RayTracingASDynamicIndexingTestInstance(Context & context)2158 RayTracingASDynamicIndexingTestInstance::RayTracingASDynamicIndexingTestInstance(Context& context)
2159 	: vkt::TestInstance(context)
2160 {
2161 }
2162 
iterate(void)2163 tcu::TestStatus RayTracingASDynamicIndexingTestInstance::iterate(void)
2164 {
2165 	const InstanceInterface&	vki							= m_context.getInstanceInterface();
2166 	const DeviceInterface&		vkd							= m_context.getDeviceInterface();
2167 	const VkDevice				device						= m_context.getDevice();
2168 	const VkPhysicalDevice		physicalDevice				= m_context.getPhysicalDevice();
2169 	const deUint32				queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
2170 	const VkQueue				queue						= m_context.getUniversalQueue();
2171 	Allocator&					allocator					= m_context.getDefaultAllocator();
2172 	const deUint32				shaderGroupHandleSize		= getShaderGroupSize(vki, physicalDevice);
2173 	const deUint32				shaderGroupBaseAlignment	= getShaderGroupBaseAlignment(vki, physicalDevice);
2174 	const deUint32				tlasCount					= 500;	// changing this will require also changing shaders
2175 	const deUint32				activeTlasCount				= 32;	// number of tlas out of <tlasCount> that will be active
2176 
2177 	const Move<VkDescriptorSetLayout> descriptorSetLayout = DescriptorSetLayoutBuilder()
2178 		.addArrayBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount, ALL_RAY_TRACING_STAGES)
2179 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)				// pointers to all acceleration structures
2180 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)				// ssbo with indices of all acceleration structures
2181 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)				// ssbo with result values
2182 		.build(vkd, device);
2183 
2184 	const Move<VkDescriptorPool> descriptorPool = DescriptorPoolBuilder()
2185 		.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount)
2186 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2187 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2188 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2189 		.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2190 	const Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
2191 
2192 	de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
2193 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,      createShaderModule(vkd, device, m_context.getBinaryCollection().get("rgen"), 0), 0);
2194 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, createShaderModule(vkd, device, m_context.getBinaryCollection().get("chit"), 0), 1);
2195 
2196 	const Move<VkPipelineLayout>			pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
2197 	Move<VkPipeline>						pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
2198 	de::MovePtr<BufferWithMemory>			raygenShaderBindingTable			= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
2199 	de::MovePtr<BufferWithMemory>			hitShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
2200 
2201 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
2202 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
2203 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
2204 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
2205 
2206 	const VkDeviceSize						pointerBufferSize		= tlasCount * sizeof(VkDeviceAddress);
2207 	const VkBufferCreateInfo				pointerBufferCreateInfo	= makeBufferCreateInfo(pointerBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2208 	de::MovePtr<BufferWithMemory>			pointerBuffer			= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, pointerBufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::DeviceAddress));
2209 
2210 	const VkDeviceSize						indicesBufferSize		= activeTlasCount * sizeof(deUint32);
2211 	const VkBufferCreateInfo				indicesBufferCreateInfo	= makeBufferCreateInfo(indicesBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2212 	de::MovePtr<BufferWithMemory>			indicesBuffer			= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, indicesBufferCreateInfo, MemoryRequirement::HostVisible));
2213 
2214 	const VkDeviceSize						resultBufferSize		= activeTlasCount * sizeof(deUint32) * 4;
2215 	const VkBufferCreateInfo				resultBufferCreateInfo	= makeBufferCreateInfo(resultBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
2216 	de::MovePtr<BufferWithMemory>			resultBuffer			= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
2217 
2218 	const Move<VkCommandPool>				cmdPool					= createCommandPool(vkd, device, 0, queueFamilyIndex);
2219 	const Move<VkCommandBuffer>				cmdBuffer				= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2220 
2221 	de::SharedPtr<BottomLevelAccelerationStructure>				blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2222 	std::vector<de::MovePtr<TopLevelAccelerationStructure>>		tlasVect(tlasCount);
2223 	std::vector<VkDeviceAddress>								tlasPtrVect(tlasCount);
2224 	std::vector<VkAccelerationStructureKHR>						tlasVkVect;
2225 
2226 	// randomly scatter active AS across the range
2227 	deRandom rnd;
2228 	deRandom_init(&rnd, 123);
2229 	std::set<deUint32> asIndicesSet;
2230 	while (asIndicesSet.size() < activeTlasCount)
2231 		asIndicesSet.insert(deRandom_getUint32(&rnd) % tlasCount);
2232 
2233 	// fill indices buffer
2234 	deUint32 helperIndex = 0;
2235 	auto& indicesBufferAlloc	= indicesBuffer->getAllocation();
2236 	deUint32* indicesBufferPtr	= reinterpret_cast<deUint32*>(indicesBufferAlloc.getHostPtr());
2237 	std::for_each(asIndicesSet.begin(), asIndicesSet.end(),
2238 		[&helperIndex, indicesBufferPtr](const deUint32& index)
2239 		{
2240 			indicesBufferPtr[helperIndex++] = index;
2241 		});
2242 	vk::flushAlloc(vkd, device, indicesBufferAlloc);
2243 
2244 	// clear result buffer
2245 	auto& resultBufferAlloc		= resultBuffer->getAllocation();
2246 	void* resultBufferPtr		= resultBufferAlloc.getHostPtr();
2247 	deMemset(resultBufferPtr, 0, static_cast<size_t>(resultBufferSize));
2248 	vk::flushAlloc(vkd, device, resultBufferAlloc);
2249 
2250 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
2251 	{
2252 		// build bottom level acceleration structure
2253 		blas->setGeometryData(
2254 			{
2255 				{ 0.0, 0.0, 0.0 },
2256 				{ 1.0, 0.0, 0.0 },
2257 				{ 0.0, 1.0, 0.0 },
2258 			},
2259 			true,
2260 			VK_GEOMETRY_OPAQUE_BIT_KHR
2261 		);
2262 
2263 		blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2264 
2265 		// build top level acceleration structures
2266 		for (deUint32 tlasIndex = 0; tlasIndex < tlasCount; ++tlasIndex)
2267 		{
2268 			auto& tlas = tlasVect[tlasIndex];
2269 			tlas = makeTopLevelAccelerationStructure();
2270 			tlas->setInstanceCount(1);
2271 			tlas->addInstance(blas);
2272 			if (!asIndicesSet.count(tlasIndex))
2273 			{
2274 				// tlas that are not in asIndicesSet should be empty but it is hard to do
2275 				// that with current cts utils so we are marking them as inactive instead
2276 				tlas->setInactiveInstances(true);
2277 			}
2278 			tlas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2279 
2280 			// get acceleration structure device address
2281 			const VkAccelerationStructureDeviceAddressInfoKHR addressInfo =
2282 			{
2283 				VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,	// VkStructureType				sType
2284 				DE_NULL,															// const void*					pNext
2285 				*tlas->getPtr()														// VkAccelerationStructureKHR	accelerationStructure
2286 			};
2287 			VkDeviceAddress vkda = vkd.getAccelerationStructureDeviceAddressKHR(device, &addressInfo);
2288 			tlasPtrVect[tlasIndex] = vkda;
2289 		}
2290 
2291 		// fill pointer buffer
2292 		vkd.cmdUpdateBuffer(*cmdBuffer, **pointerBuffer, 0, pointerBufferSize, tlasPtrVect.data());
2293 
2294 		// wait for data transfers
2295 		const VkMemoryBarrier bufferUploadBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
2296 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &bufferUploadBarrier, 1u);
2297 
2298 		// wait for as build
2299 		const VkMemoryBarrier asBuildBarrier = makeMemoryBarrier(VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR);
2300 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &asBuildBarrier, 1u);
2301 
2302 		tlasVkVect.reserve(tlasCount);
2303 		for (auto& tlas : tlasVect)
2304 			tlasVkVect.push_back(*tlas->getPtr());
2305 
2306 		VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet =
2307 		{
2308 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	// VkStructureType						sType;
2309 			DE_NULL,															// const void*							pNext;
2310 			tlasCount,															// deUint32								accelerationStructureCount;
2311 			tlasVkVect.data(),													// const VkAccelerationStructureKHR*	pAccelerationStructures;
2312 		};
2313 
2314 		const vk::VkDescriptorBufferInfo pointerBufferInfo	= makeDescriptorBufferInfo(**pointerBuffer, 0u, VK_WHOLE_SIZE);
2315 		const vk::VkDescriptorBufferInfo indicesBufferInfo	= makeDescriptorBufferInfo(**indicesBuffer, 0u, VK_WHOLE_SIZE);
2316 		const vk::VkDescriptorBufferInfo resultInfo			= makeDescriptorBufferInfo(**resultBuffer,  0u, VK_WHOLE_SIZE);
2317 
2318 		DescriptorSetUpdateBuilder()
2319 			.writeArray (*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount, &accelerationStructureWriteDescriptorSet)
2320 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &pointerBufferInfo)
2321 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indicesBufferInfo)
2322 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(3u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo)
2323 			.update(vkd, device);
2324 
2325 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
2326 
2327 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
2328 
2329 		cmdTraceRays(vkd,
2330 			*cmdBuffer,
2331 			&raygenShaderBindingTableRegion,
2332 			&missShaderBindingTableRegion,
2333 			&hitShaderBindingTableRegion,
2334 			&callableShaderBindingTableRegion,
2335 			activeTlasCount, 1, 1);
2336 
2337 		const VkMemoryBarrier postTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
2338 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
2339 	}
2340 	endCommandBuffer(vkd, *cmdBuffer);
2341 
2342 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2343 
2344 	invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(), resultBuffer->getAllocation().getOffset(), resultBufferSize);
2345 
2346 	// verify result buffer
2347 	deUint32		failures	= 0;
2348 	const deUint32*	resultPtr	= reinterpret_cast<deUint32*>(resultBuffer->getAllocation().getHostPtr());
2349 	for (deUint32 index = 0; index < activeTlasCount; ++index)
2350 	{
2351 		failures += (resultPtr[0 * activeTlasCount + index] != 2) +
2352 					(resultPtr[1 * activeTlasCount + index] != 3) +
2353 					(resultPtr[2 * activeTlasCount + index] != 5) +
2354 					(resultPtr[3 * activeTlasCount + index] != 7);
2355 	}
2356 
2357 	if (failures)
2358 		return tcu::TestStatus::fail(de::toString(failures) + " failures, " + de::toString(4 * activeTlasCount - failures) + " are ok");
2359 	return tcu::TestStatus::pass("Pass");
2360 }
2361 
2362 // Tests the vkGetDeviceAccelerationStructureKHR routine
2363 class RayTracingDeviceASCompabilityKHRTestInstance : public TestInstance
2364 {
2365 public:
RayTracingDeviceASCompabilityKHRTestInstance(Context & context,const de::SharedPtr<TestParams> params)2366 					RayTracingDeviceASCompabilityKHRTestInstance	(Context& context, const de::SharedPtr<TestParams> params)
2367 						: TestInstance	(context)
2368 						, m_params		(params)
2369 					{
2370 					}
2371 
2372 	tcu::TestStatus	iterate											(void) override;
2373 
2374 protected:
2375 	template<class ASType>
2376 		bool		performTest										(VkCommandPool								cmdPool,
2377 																	 VkCommandBuffer							cmdBuffer,
2378 																	 const std::vector<de::SharedPtr<ASType>>	sourceStructures,
2379 																	 const std::vector<VkDeviceSize>&			copySizes,
2380 																	 const std::vector<VkDeviceSize>&			compactSizes);
2381 
2382 	VkAccelerationStructureCompatibilityKHR
2383 					getDeviceASCompatibilityKHR						(const deUint8*		versionInfoData);
2384 	std::string		getUUIDsString									(const deUint8* header) const;
2385 
2386 
2387 private:
2388 	const de::SharedPtr<TestParams>	m_params;
2389 };
2390 
2391 // Tests for updating botto-level AS(s) address(es) in top-level AS's header
2392 class RayTracingHeaderBottomAddressTestInstance : public TestInstance
2393 {
2394 public:
RayTracingHeaderBottomAddressTestInstance(Context & context,const de::SharedPtr<TestParams> params)2395 					RayTracingHeaderBottomAddressTestInstance						(Context&											context,
2396 																					 const de::SharedPtr<TestParams>					params)
2397 						: TestInstance	(context)
2398 						, m_params		(params)
2399 					{
2400 					}
2401 	tcu::TestStatus	iterate															(void) override;
2402 
2403 protected:
2404 	de::SharedPtr<TopLevelAccelerationStructure>	prepareTopAccelerationStructure	(const DeviceInterface&								vk,
2405 																					 VkDevice											device,
2406 																					 Allocator&											allocator,
2407 																					 VkCommandBuffer									cmdBuffer);
2408 
2409 	bool											areAddressesTheSame				(const std::vector<deUint64>&						addresses,
2410 																					 const SerialStorage::AccelerationStructureHeader*	header);
2411 
2412 	bool											areAddressesDifferent			(const std::vector<deUint64>&						addresses1,
2413 																					 const std::vector<deUint64>&						addresses2);
2414 private:
2415 	const de::SharedPtr<TestParams>	m_params;
2416 };
2417 
2418 class RayTracingDeviceASCompabilityKHRTestCase : public TestCase
2419 {
2420 public:
RayTracingDeviceASCompabilityKHRTestCase(tcu::TestContext & ctx,const char * name,const de::SharedPtr<TestParams> params)2421 					RayTracingDeviceASCompabilityKHRTestCase	(tcu::TestContext& ctx, const char* name, const de::SharedPtr<TestParams> params)
2422 						: TestCase(ctx, name, std::string())
2423 						, m_params(params)
2424 					{
2425 					}
2426 
2427 	void			checkSupport								(Context&			context) const override;
createInstance(Context & context) const2428 	TestInstance*	createInstance								(Context&			context) const override
2429 	{
2430 		return new RayTracingDeviceASCompabilityKHRTestInstance(context, m_params);
2431 	}
2432 
2433 private:
2434 	de::SharedPtr<TestParams>	m_params;
2435 };
2436 
2437 class RayTracingHeaderBottomAddressTestCase : public TestCase
2438 {
2439 public:
RayTracingHeaderBottomAddressTestCase(tcu::TestContext & ctx,const char * name,const de::SharedPtr<TestParams> params)2440 					RayTracingHeaderBottomAddressTestCase	(tcu::TestContext& ctx, const char* name, const de::SharedPtr<TestParams> params)
2441 						: TestCase(ctx, name, std::string())
2442 						, m_params(params)
2443 					{
2444 					}
2445 
2446 	void			checkSupport								(Context&			context) const override;
createInstance(Context & context) const2447 	TestInstance*	createInstance								(Context&			context) const override
2448 	{
2449 		return new RayTracingHeaderBottomAddressTestInstance(context, m_params);
2450 	}
2451 
2452 private:
2453 	de::SharedPtr<TestParams>	m_params;
2454 };
2455 
checkSupport(Context & context) const2456 void RayTracingDeviceASCompabilityKHRTestCase ::checkSupport (Context& context) const
2457 {
2458 	context.requireInstanceFunctionality("VK_KHR_get_physical_device_properties2");
2459 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
2460 
2461 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
2462 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
2463 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
2464 
2465 	// Check supported vertex format.
2466 	checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(), m_params->vertexFormat);
2467 }
2468 
checkSupport(Context & context) const2469 void RayTracingHeaderBottomAddressTestCase ::checkSupport (Context& context) const
2470 {
2471 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
2472 
2473 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
2474 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR && accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
2475 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
2476 
2477 	// Check supported vertex format.
2478 	checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(), m_params->vertexFormat);
2479 }
2480 
getDeviceASCompatibilityKHR(const deUint8 * versionInfoData)2481 VkAccelerationStructureCompatibilityKHR	RayTracingDeviceASCompabilityKHRTestInstance::getDeviceASCompatibilityKHR (const deUint8* versionInfoData)
2482 {
2483 	const VkDevice								device		= m_context.getDevice();
2484 	const DeviceInterface&						vkd			= m_context.getDeviceInterface();
2485 
2486 	VkAccelerationStructureCompatibilityKHR		compability = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_MAX_ENUM_KHR;
2487 
2488 	const VkAccelerationStructureVersionInfoKHR versionInfo =
2489 	{
2490 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_VERSION_INFO_KHR,	// sType
2491 		DE_NULL,													// pNext
2492 		versionInfoData												// pVersionData
2493 	};
2494 
2495 	vkd.getDeviceAccelerationStructureCompatibilityKHR(device, &versionInfo, &compability);
2496 
2497 	return compability;
2498 }
2499 
getUUIDsString(const deUint8 * header) const2500 std::string RayTracingDeviceASCompabilityKHRTestInstance::getUUIDsString (const deUint8* header) const
2501 {
2502 	std::stringstream		ss;
2503 
2504 	int			offset		= 0;
2505 	const int	widths[]	= { 4, 2, 2, 2, 6 };
2506 
2507 	for (int h = 0; h < 2; ++h)
2508 	{
2509 		if (h) ss << ' ';
2510 
2511 		for (int w = 0; w < DE_LENGTH_OF_ARRAY(widths); ++w)
2512 		{
2513 			if (w) ss << '-';
2514 
2515 			for (int i = 0; i < widths[w]; ++i)
2516 				ss << std::hex << std::uppercase << static_cast<int>(header[i + offset]);
2517 
2518 			offset += widths[w];
2519 		}
2520 	}
2521 
2522 	return ss.str();
2523 }
2524 
iterate(void)2525 tcu::TestStatus RayTracingDeviceASCompabilityKHRTestInstance::iterate (void)
2526 {
2527 	const DeviceInterface&			vkd					= m_context.getDeviceInterface();
2528 	const VkDevice					device				= m_context.getDevice();
2529 	const deUint32					queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
2530 	const VkQueue					queue				= m_context.getUniversalQueue();
2531 	Allocator&						allocator			= m_context.getDefaultAllocator();
2532 
2533 	const Move<VkCommandPool>		cmdPool				= createCommandPool(vkd, device, 0, queueFamilyIndex);
2534 	const Move<VkCommandBuffer>		cmdBuffer			= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2535 
2536 	bool							result				= false;
2537 
2538 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottomStructures;
2539 	std::vector<VkAccelerationStructureKHR>							bottomHandles;
2540 	std::vector<de::SharedPtr<TopLevelAccelerationStructure>>		topStructures;
2541 	std::vector<VkAccelerationStructureKHR>							topHandles;
2542 	Move<VkQueryPool>												queryPoolCompact;
2543 	Move<VkQueryPool>												queryPoolSerial;
2544 	std::vector<VkDeviceSize>										compactSizes;
2545 	std::vector<VkDeviceSize>										serialSizes;
2546 
2547 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
2548 
2549 	bottomStructures = m_params->testConfiguration->initBottomAccelerationStructures(m_context, *m_params);
2550 	for (auto& blas : bottomStructures)
2551 	{
2552 		blas->setBuildType(m_params->buildType);
2553 		blas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
2554 		blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2555 		bottomHandles.push_back(*(blas->getPtr()));
2556 	}
2557 
2558 	if (m_params->operationTarget == OT_TOP_ACCELERATION)
2559 	{
2560 		de::MovePtr<TopLevelAccelerationStructure> tlas = m_params->testConfiguration->initTopAccelerationStructure(m_context, *m_params, bottomStructures);
2561 		tlas->setBuildType					(m_params->buildType);
2562 		tlas->setBuildFlags				(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
2563 		tlas->createAndBuild				(vkd, device, *cmdBuffer, allocator);
2564 		topHandles.push_back							(*(tlas->getPtr()));
2565 		topStructures.push_back(de::SharedPtr<TopLevelAccelerationStructure>(tlas.release()));
2566 	}
2567 
2568 	const deUint32 queryCount = deUint32((m_params->operationTarget == OT_BOTTOM_ACCELERATION) ? bottomStructures.size() : topStructures.size());
2569 	const std::vector<VkAccelerationStructureKHR>& handles = (m_params->operationTarget == OT_BOTTOM_ACCELERATION) ? bottomHandles : topHandles;
2570 
2571 	// query compact size
2572 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2573 		queryPoolCompact = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount);
2574 	queryAccelerationStructureSize(vkd, device, *cmdBuffer, handles, m_params->buildType, *queryPoolCompact, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, compactSizes);
2575 
2576 	// query serialization size
2577 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2578 		queryPoolSerial = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
2579 	queryAccelerationStructureSize(vkd, device, *cmdBuffer, handles, m_params->buildType, queryPoolSerial.get(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, serialSizes);
2580 
2581 	endCommandBuffer(vkd, *cmdBuffer);
2582 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2583 
2584 	if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2585 	{
2586 		VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolCompact, 0u, queryCount, queryCount * sizeof(VkDeviceSize), compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2587 		VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolSerial, 0u, queryCount, queryCount * sizeof(VkDeviceSize), serialSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2588 
2589 		vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2590 	}
2591 
2592 	if (m_params->operationTarget == OT_BOTTOM_ACCELERATION)
2593 		result = performTest<BottomLevelAccelerationStructure>(*cmdPool, *cmdBuffer, bottomStructures, compactSizes, serialSizes);
2594 	else
2595 		result = performTest<TopLevelAccelerationStructure>(*cmdPool, *cmdBuffer, topStructures, compactSizes, serialSizes);
2596 
2597 	return result ? tcu::TestStatus::pass("") : tcu::TestStatus::fail("");
2598 }
2599 
2600 template<class ASType>
performTest(VkCommandPool cmdPool,VkCommandBuffer cmdBuffer,const std::vector<de::SharedPtr<ASType>> sourceStructures,const std::vector<VkDeviceSize> & compactSizes,const std::vector<VkDeviceSize> & serialSizes)2601 bool RayTracingDeviceASCompabilityKHRTestInstance::performTest (VkCommandPool								cmdPool,
2602 																VkCommandBuffer								cmdBuffer,
2603 																const std::vector<de::SharedPtr<ASType>>	sourceStructures,
2604 																const std::vector<VkDeviceSize>&			compactSizes,
2605 																const std::vector<VkDeviceSize>&			serialSizes)
2606 {
2607 	const VkQueue								queue					= m_context.getUniversalQueue();
2608 	const VkDevice								device					= m_context.getDevice();
2609 	const DeviceInterface&						vkd						= m_context.getDeviceInterface();
2610 	Allocator&									allocator				= m_context.getDefaultAllocator();
2611 
2612 	const deUint32								sourceStructuresCount	= deUint32(sourceStructures.size());
2613 
2614 	Move<VkQueryPool>							queryPoolCompactSerial;
2615 	std::vector<VkDeviceSize>					compactSerialSizes;
2616 
2617 	std::vector<VkAccelerationStructureKHR>		compactHandles;
2618 	std::vector<de::SharedPtr<ASType>>			compactStructures;
2619 
2620 	std::vector<de::SharedPtr<SerialStorage>>	sourceSerialized;
2621 	std::vector<de::SharedPtr<SerialStorage>>	compactSerialized;
2622 
2623 
2624 	// make compact copy of acceleration structure
2625 	{
2626 		beginCommandBuffer(vkd, cmdBuffer, 0u);
2627 
2628 		for (size_t i = 0; i < sourceStructuresCount; ++i)
2629 		{
2630 			de::MovePtr<ASType> asCopy = makeAccelerationStructure<ASType>();
2631 			asCopy->setBuildType(m_params->buildType);
2632 			asCopy->createAndCopyFrom(vkd, device, cmdBuffer, allocator, sourceStructures[i].get(), compactSizes[i], 0u);
2633 			compactHandles.push_back(*(asCopy->getPtr()));
2634 			compactStructures.push_back(de::SharedPtr<ASType>(asCopy.release()));
2635 		}
2636 
2637 		// query serialization size of compact acceleration structures
2638 		if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2639 			queryPoolCompactSerial = makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, sourceStructuresCount);
2640 		queryAccelerationStructureSize(vkd, device, cmdBuffer, compactHandles, m_params->buildType, *queryPoolCompactSerial, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, compactSerialSizes);
2641 
2642 		endCommandBuffer(vkd, cmdBuffer);
2643 		submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2644 
2645 		if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2646 		{
2647 			VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolCompactSerial, 0u, sourceStructuresCount, (sourceStructuresCount * sizeof(VkDeviceSize)), compactSerialSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2648 			vkd.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2649 		}
2650 	}
2651 
2652 	// serialize both structures to memory
2653 	{
2654 		beginCommandBuffer(vkd, cmdBuffer, 0u);
2655 
2656 		for (size_t i = 0 ; i < sourceStructuresCount; ++i)
2657 		{
2658 			sourceSerialized.push_back(de::SharedPtr<SerialStorage>(new SerialStorage(vkd, device, allocator, m_params->buildType, serialSizes[i])));
2659 			sourceStructures[i]->serialize(vkd, device, cmdBuffer, sourceSerialized.back().get());
2660 
2661 			compactSerialized.push_back(de::SharedPtr<SerialStorage>(new SerialStorage(vkd, device, allocator, m_params->buildType, compactSerialSizes[i])));
2662 			compactStructures[i]->serialize(vkd, device, cmdBuffer, compactSerialized.back().get());
2663 		}
2664 
2665 		endCommandBuffer(vkd, cmdBuffer);
2666 		submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2667 	}
2668 
2669 	// verify compatibility
2670 	bool result = true;
2671 	for (size_t i = 0; result && (i < sourceStructuresCount); ++i)
2672 	{
2673 		const deUint8* s_header = static_cast<const deUint8*>(sourceSerialized[i]->getHostAddressConst().hostAddress);
2674 		const deUint8* c_header = static_cast<const deUint8*>(compactSerialized[i]->getHostAddressConst().hostAddress);
2675 
2676 		const auto s_compability = getDeviceASCompatibilityKHR(s_header);
2677 		const auto c_compability = getDeviceASCompatibilityKHR(c_header);
2678 
2679 		result &= ((s_compability == c_compability) && (s_compability == VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR));
2680 
2681 		if (!result)
2682 		{
2683 			tcu::TestLog& log = m_context.getTestContext().getLog();
2684 
2685 			log << tcu::TestLog::Message << getUUIDsString(s_header) << " serialized AS compability failed" << tcu::TestLog::EndMessage;
2686 			log << tcu::TestLog::Message << getUUIDsString(c_header) << " compact AS compability failed" << tcu::TestLog::EndMessage;
2687 		}
2688 	}
2689 
2690 	return result;
2691 }
2692 
2693 de::SharedPtr<TopLevelAccelerationStructure>
prepareTopAccelerationStructure(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer)2694 RayTracingHeaderBottomAddressTestInstance::prepareTopAccelerationStructure (const DeviceInterface&	vk,
2695 																			VkDevice				device,
2696 																			Allocator&				allocator,
2697 																			VkCommandBuffer			cmdBuffer)
2698 {
2699 	const std::vector<tcu::Vec3>									geometryData =
2700 	{
2701 		{ 0.0, 0.0, 0.0 },
2702 		{ 1.0, 0.0, 0.0 },
2703 		{ 0.0, 1.0, 0.0 },
2704 	};
2705 
2706 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	bottoms;
2707 
2708 	if (TTT_IDENTICAL_INSTANCES == m_params->topTestType)
2709 	{
2710 		auto blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2711 		blas->setBuildType(m_params->buildType);
2712 		blas->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
2713 		blas->createAndBuild(vk, device, cmdBuffer, allocator);
2714 		for (deUint32 i = 0; i < m_params->width; ++i)
2715 		{
2716 			bottoms.emplace_back(blas);
2717 		}
2718 	}
2719 	else if (TTT_DIFFERENT_INSTANCES == m_params->topTestType)
2720 	{
2721 		for (deUint32 i = 0; i < m_params->width; ++i)
2722 		{
2723 			auto blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2724 			blas->setBuildType(m_params->buildType);
2725 			blas->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
2726 			blas->createAndBuild(vk, device, cmdBuffer, allocator);
2727 			bottoms.emplace_back(blas);
2728 		}
2729 	}
2730 	else // TTT_MIX_INSTANCES == m_params->topTestType
2731 	{
2732 		for (deUint32 i = 0; i < m_params->width; ++i)
2733 		{
2734 			{
2735 				auto blas1 = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2736 				blas1->setBuildType(m_params->buildType);
2737 				blas1->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
2738 				blas1->createAndBuild(vk, device, cmdBuffer, allocator);
2739 				bottoms.emplace_back(blas1);
2740 			}
2741 
2742 			{
2743 				auto blas2 = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2744 				blas2->setBuildType(m_params->buildType);
2745 				blas2->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
2746 				blas2->createAndBuild(vk, device, cmdBuffer, allocator);
2747 				bottoms.emplace_back(blas2);
2748 			}
2749 		}
2750 
2751 	}
2752 
2753 	const std::size_t												instanceCount = bottoms.size();
2754 
2755 	de::MovePtr<TopLevelAccelerationStructure>						tlas = makeTopLevelAccelerationStructure();
2756 	tlas->setBuildType(m_params->buildType);
2757 	tlas->setInstanceCount(instanceCount);
2758 
2759 	for (std::size_t i = 0; i < instanceCount; ++i)
2760 	{
2761 		const VkTransformMatrixKHR	transformMatrixKHR =
2762 		{
2763 			{	//  float	matrix[3][4];
2764 				{ 1.0f, 0.0f, 0.0f, (float)i },
2765 				{ 0.0f, 1.0f, 0.0f, (float)i },
2766 				{ 0.0f, 0.0f, 1.0f, 0.0f },
2767 			}
2768 		};
2769 		tlas->addInstance(bottoms[i], transformMatrixKHR, 0, 0xFFu, 0u, getCullFlags((m_params->cullFlags)));
2770 	}
2771 
2772 	tlas->createAndBuild(vk, device, cmdBuffer, allocator);
2773 
2774 	return de::SharedPtr<TopLevelAccelerationStructure>(tlas.release());
2775 }
2776 
iterate(void)2777 tcu::TestStatus RayTracingHeaderBottomAddressTestInstance::iterate (void)
2778 {
2779 	const DeviceInterface&								vkd				= m_context.getDeviceInterface();
2780 	const VkDevice										device			= m_context.getDevice();
2781 	const deUint32										familyIndex		= m_context.getUniversalQueueFamilyIndex();
2782 	const VkQueue										queue			= m_context.getUniversalQueue();
2783 	Allocator&											allocator		= m_context.getDefaultAllocator();
2784 
2785 	const Move<VkCommandPool>							cmdPool			= createCommandPool(vkd, device, 0, familyIndex);
2786 	const Move<VkCommandBuffer>							cmdBuffer		= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2787 
2788 	beginCommandBuffer(vkd, *cmdBuffer, 0);
2789 	de::SharedPtr<TopLevelAccelerationStructure>		src				= prepareTopAccelerationStructure(vkd, device, allocator, *cmdBuffer);
2790 	endCommandBuffer(vkd, *cmdBuffer);
2791 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
2792 
2793 	de::MovePtr<TopLevelAccelerationStructure>			dst				= makeTopLevelAccelerationStructure();
2794 
2795 	const std::vector<deUint64>							inAddrs			= src->getSerializingAddresses(vkd, device);
2796 	const std::vector<VkDeviceSize>						inSizes			= src->getSerializingSizes(vkd, device, queue, familyIndex);
2797 
2798 	const SerialInfo									serialInfo		(inAddrs, inSizes);
2799 	SerialStorage										deepStorage		(vkd, device, allocator, m_params->buildType, serialInfo);
2800 
2801 	// make deep serialization - top-level AS width bottom-level structures that it owns
2802 	beginCommandBuffer(vkd, *cmdBuffer, 0);
2803 	src->serialize(vkd, device, *cmdBuffer, &deepStorage);
2804 	endCommandBuffer(vkd, *cmdBuffer);
2805 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
2806 
2807 	// deserialize all from the previous step to a new top-level AS
2808 	// bottom-level structure addresses should be updated when deep data is deserialized
2809 	beginCommandBuffer(vkd, *cmdBuffer, 0);
2810 	dst->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, &deepStorage);
2811 	endCommandBuffer(vkd, *cmdBuffer);
2812 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
2813 
2814 	SerialStorage										shallowStorage	(vkd, device, allocator, m_params->buildType, inSizes[0]);
2815 
2816 	// make shallow serialization - only top-level AS without bottom-level structures
2817 	beginCommandBuffer(vkd, *cmdBuffer, 0);
2818 	dst->serialize(vkd, device, *cmdBuffer, &shallowStorage);
2819 	endCommandBuffer(vkd, *cmdBuffer);
2820 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
2821 
2822 	// get data to verification
2823 	const std::vector<deUint64>							outAddrs		= dst->getSerializingAddresses(vkd, device);
2824 	const SerialStorage::AccelerationStructureHeader*	header			= shallowStorage.getASHeader();
2825 
2826 	return (areAddressesDifferent(inAddrs, outAddrs) && areAddressesTheSame(outAddrs, header)) ? tcu::TestStatus::pass("") : tcu::TestStatus::fail("");
2827 }
2828 
areAddressesTheSame(const std::vector<deUint64> & addresses,const SerialStorage::AccelerationStructureHeader * header)2829 bool RayTracingHeaderBottomAddressTestInstance::areAddressesTheSame (const std::vector<deUint64>& addresses, const SerialStorage::AccelerationStructureHeader* header)
2830 {
2831 	const deUint32 cbottoms = deUint32(addresses.size() - 1);
2832 
2833 	// header should contain the same number of handles as serialized/deserialized top-level AS
2834 	if (cbottoms != header->handleCount) return false;
2835 
2836 	std::set<deUint64> refAddrs;
2837 	std::set<deUint64> checkAddrs;
2838 
2839 	// distinct, squach and sort address list
2840 	for (deUint32 i = 0; i < cbottoms; ++i)
2841 	{
2842 		refAddrs.insert(addresses[i+1]);
2843 		checkAddrs.insert(header->handleArray[i]);
2844 	}
2845 
2846 	return std::equal(refAddrs.begin(), refAddrs.end(), checkAddrs.begin());
2847 }
2848 
areAddressesDifferent(const std::vector<deUint64> & addresses1,const std::vector<deUint64> & addresses2)2849 bool RayTracingHeaderBottomAddressTestInstance::areAddressesDifferent (const std::vector<deUint64>& addresses1, const std::vector<deUint64>& addresses2)
2850 {
2851 	// the number of addresses must be equal
2852 	if (addresses1.size() != addresses2.size())
2853 		return false;
2854 
2855 	// adresses of top-level AS must differ
2856 	if (addresses1[0] == addresses2[0])
2857 		return false;
2858 
2859 	std::set<deUint64>	addrs1;
2860 	std::set<deUint64>	addrs2;
2861 	deUint32			matches		= 0;
2862 	const deUint32		cbottoms	= deUint32(addresses1.size() - 1);
2863 
2864 	for (deUint32 i = 0; i < cbottoms; ++i)
2865 	{
2866 		addrs1.insert(addresses1[i+1]);
2867 		addrs2.insert(addresses2[i+1]);
2868 	}
2869 
2870 	// the first addresses set must not contain any address from the second addresses set
2871 	for (auto& addr1 : addrs1)
2872 	{
2873 		if (addrs2.end() != addrs2.find(addr1))
2874 			++matches;
2875 	}
2876 
2877 	return (matches == 0);
2878 }
2879 
2880 }	// anonymous
2881 
addBasicBuildingTests(tcu::TestCaseGroup * group)2882 void addBasicBuildingTests(tcu::TestCaseGroup* group)
2883 {
2884 	struct
2885 	{
2886 		vk::VkAccelerationStructureBuildTypeKHR	buildType;
2887 		const char*								name;
2888 	} buildTypes[] =
2889 	{
2890 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,				"cpu_built"	},
2891 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,				"gpu_built"	},
2892 	};
2893 
2894 	struct
2895 	{
2896 		BottomTestType							testType;
2897 		bool									usesAOP;
2898 		const char*								name;
2899 	} bottomTestTypes[] =
2900 	{
2901 		{ BTT_TRIANGLES,	false,										"triangles" },
2902 		{ BTT_TRIANGLES,	true,										"triangles_aop" },
2903 		{ BTT_AABBS,		false,										"aabbs" },
2904 		{ BTT_AABBS,		true,										"aabbs_aop" },
2905 	};
2906 
2907 	struct
2908 	{
2909 		TopTestType								testType;
2910 		bool									usesAOP;
2911 		const char*								name;
2912 	} topTestTypes[] =
2913 	{
2914 		{ TTT_IDENTICAL_INSTANCES,	false,								"identical_instances" },
2915 		{ TTT_IDENTICAL_INSTANCES,	true,								"identical_instances_aop" },
2916 		{ TTT_DIFFERENT_INSTANCES,	false,								"different_instances" },
2917 		{ TTT_DIFFERENT_INSTANCES,	true,								"different_instances_aop" },
2918 	};
2919 
2920 	struct BuildFlagsData
2921 	{
2922 		VkBuildAccelerationStructureFlagsKHR	flags;
2923 		const char*								name;
2924 	};
2925 
2926 	BuildFlagsData optimizationTypes[] =
2927 	{
2928 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
2929 		{ VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,	"fasttrace" },
2930 		{ VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR,	"fastbuild" },
2931 	};
2932 
2933 	BuildFlagsData updateTypes[] =
2934 	{
2935 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
2936 		{ VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR,			"update" },
2937 	};
2938 
2939 	BuildFlagsData compactionTypes[] =
2940 	{
2941 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
2942 		{ VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR,		"compaction" },
2943 	};
2944 
2945 	BuildFlagsData lowMemoryTypes[] =
2946 	{
2947 		{ VkBuildAccelerationStructureFlagsKHR(0u),						"0" },
2948 		{ VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_KHR,			"lowmemory" },
2949 	};
2950 
2951 	struct
2952 	{
2953 		bool		padVertices;
2954 		const char*	name;
2955 	} paddingType[] =
2956 	{
2957 		{ false,	"nopadding"	},
2958 		{ true,		"padded"	},
2959 	};
2960 
2961 	struct
2962 	{
2963 		bool		topGeneric;
2964 		bool		bottomGeneric;
2965 		const char*	suffix;
2966 	} createGenericParams[] =
2967 	{
2968 		{	false,	false,	""					},
2969 		{	false,	true,	"_bottomgeneric"	},
2970 		{	true,	false,	"_topgeneric"		},
2971 		{	true,	true,	"_bothgeneric"		},
2972 	};
2973 
2974 	for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
2975 	{
2976 		de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name, ""));
2977 
2978 		for (size_t bottomNdx = 0; bottomNdx < DE_LENGTH_OF_ARRAY(bottomTestTypes); ++bottomNdx)
2979 		{
2980 			de::MovePtr<tcu::TestCaseGroup> bottomGroup(new tcu::TestCaseGroup(group->getTestContext(), bottomTestTypes[bottomNdx].name, ""));
2981 
2982 			for (size_t topNdx = 0; topNdx < DE_LENGTH_OF_ARRAY(topTestTypes); ++topNdx)
2983 			{
2984 				de::MovePtr<tcu::TestCaseGroup> topGroup(new tcu::TestCaseGroup(group->getTestContext(), topTestTypes[topNdx].name, ""));
2985 
2986 				for (int paddingTypeIdx = 0; paddingTypeIdx < DE_LENGTH_OF_ARRAY(paddingType); ++paddingTypeIdx)
2987 				{
2988 					de::MovePtr<tcu::TestCaseGroup> paddingGroup(new tcu::TestCaseGroup(group->getTestContext(), paddingType[paddingTypeIdx].name, ""));
2989 
2990 					for (size_t optimizationNdx = 0; optimizationNdx < DE_LENGTH_OF_ARRAY(optimizationTypes); ++optimizationNdx)
2991 					{
2992 						for (size_t updateNdx = 0; updateNdx < DE_LENGTH_OF_ARRAY(updateTypes); ++updateNdx)
2993 						{
2994 							for (size_t compactionNdx = 0; compactionNdx < DE_LENGTH_OF_ARRAY(compactionTypes); ++compactionNdx)
2995 							{
2996 								for (size_t lowMemoryNdx = 0; lowMemoryNdx < DE_LENGTH_OF_ARRAY(lowMemoryTypes); ++lowMemoryNdx)
2997 								{
2998 									for (int createGenericIdx = 0; createGenericIdx < DE_LENGTH_OF_ARRAY(createGenericParams); ++createGenericIdx)
2999 									{
3000 										std::string testName =
3001 											std::string(optimizationTypes[optimizationNdx].name) + "_" +
3002 											std::string(updateTypes[updateNdx].name) + "_" +
3003 											std::string(compactionTypes[compactionNdx].name) + "_" +
3004 											std::string(lowMemoryTypes[lowMemoryNdx].name) +
3005 											std::string(createGenericParams[createGenericIdx].suffix);
3006 
3007 										TestParams testParams
3008 										{
3009 											buildTypes[buildTypeNdx].buildType,
3010 											VK_FORMAT_R32G32B32_SFLOAT,
3011 											paddingType[paddingTypeIdx].padVertices,
3012 											VK_INDEX_TYPE_NONE_KHR,
3013 											bottomTestTypes[bottomNdx].testType,
3014 											InstanceCullFlags::NONE,
3015 											bottomTestTypes[bottomNdx].usesAOP,
3016 											createGenericParams[createGenericIdx].bottomGeneric,
3017 											topTestTypes[topNdx].testType,
3018 											topTestTypes[topNdx].usesAOP,
3019 											createGenericParams[createGenericIdx].topGeneric,
3020 											optimizationTypes[optimizationNdx].flags | updateTypes[updateNdx].flags | compactionTypes[compactionNdx].flags | lowMemoryTypes[lowMemoryNdx].flags,
3021 											OT_NONE,
3022 											OP_NONE,
3023 											RTAS_DEFAULT_SIZE,
3024 											RTAS_DEFAULT_SIZE,
3025 											de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
3026 											0u,
3027 											EmptyAccelerationStructureCase::NOT_EMPTY,
3028 											InstanceCustomIndexCase::NONE,
3029 										};
3030 										paddingGroup->addChild(new RayTracingASBasicTestCase(group->getTestContext(), testName.c_str(), "", testParams));
3031 									}
3032 								}
3033 							}
3034 						}
3035 					}
3036 					topGroup->addChild(paddingGroup.release());
3037 				}
3038 				bottomGroup->addChild(topGroup.release());
3039 			}
3040 			buildGroup->addChild(bottomGroup.release());
3041 		}
3042 		group->addChild(buildGroup.release());
3043 	}
3044 }
3045 
addVertexIndexFormatsTests(tcu::TestCaseGroup * group)3046 void addVertexIndexFormatsTests(tcu::TestCaseGroup* group)
3047 {
3048 	struct
3049 	{
3050 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
3051 		const char*											name;
3052 	} buildTypes[] =
3053 	{
3054 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
3055 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
3056 	};
3057 
3058 	const VkFormat vertexFormats[] =
3059 	{
3060 		// Mandatory formats.
3061 		VK_FORMAT_R32G32_SFLOAT,
3062 		VK_FORMAT_R32G32B32_SFLOAT,
3063 		VK_FORMAT_R16G16_SFLOAT,
3064 		VK_FORMAT_R16G16B16A16_SFLOAT,
3065 		VK_FORMAT_R16G16_SNORM,
3066 		VK_FORMAT_R16G16B16A16_SNORM,
3067 
3068 		// Additional formats.
3069 		VK_FORMAT_R8G8_SNORM,
3070 		VK_FORMAT_R8G8B8_SNORM,
3071 		VK_FORMAT_R8G8B8A8_SNORM,
3072 		VK_FORMAT_R16G16B16_SNORM,
3073 		VK_FORMAT_R16G16B16_SFLOAT,
3074 		VK_FORMAT_R32G32B32A32_SFLOAT,
3075 		VK_FORMAT_R64G64_SFLOAT,
3076 		VK_FORMAT_R64G64B64_SFLOAT,
3077 		VK_FORMAT_R64G64B64A64_SFLOAT,
3078 	};
3079 
3080 	struct
3081 	{
3082 		VkIndexType								indexType;
3083 		const char*								name;
3084 	} indexFormats[] =
3085 	{
3086 		{ VK_INDEX_TYPE_NONE_KHR ,				"index_none"		},
3087 		{ VK_INDEX_TYPE_UINT16 ,				"index_uint16"	},
3088 		{ VK_INDEX_TYPE_UINT32 ,				"index_uint32"	},
3089 	};
3090 
3091 	struct
3092 	{
3093 		bool		padVertices;
3094 		const char*	name;
3095 	} paddingType[] =
3096 	{
3097 		{ false,	"nopadding"	},
3098 		{ true,		"padded"	},
3099 	};
3100 
3101 	for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
3102 	{
3103 		de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name, ""));
3104 
3105 		for (size_t vertexFormatNdx = 0; vertexFormatNdx < DE_LENGTH_OF_ARRAY(vertexFormats); ++vertexFormatNdx)
3106 		{
3107 			const auto format		= vertexFormats[vertexFormatNdx];
3108 			const auto formatName	= getFormatSimpleName(format);
3109 
3110 			de::MovePtr<tcu::TestCaseGroup> vertexFormatGroup(new tcu::TestCaseGroup(group->getTestContext(), formatName.c_str(), ""));
3111 
3112 			for (int paddingIdx = 0; paddingIdx < DE_LENGTH_OF_ARRAY(paddingType); ++paddingIdx)
3113 			{
3114 				de::MovePtr<tcu::TestCaseGroup> paddingGroup(new tcu::TestCaseGroup(group->getTestContext(), paddingType[paddingIdx].name, ""));
3115 
3116 				for (size_t indexFormatNdx = 0; indexFormatNdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatNdx)
3117 				{
3118 					TestParams testParams
3119 					{
3120 						buildTypes[buildTypeNdx].buildType,
3121 						format,
3122 						paddingType[paddingIdx].padVertices,
3123 						indexFormats[indexFormatNdx].indexType,
3124 						BTT_TRIANGLES,
3125 						InstanceCullFlags::NONE,
3126 						false,
3127 						false,
3128 						TTT_IDENTICAL_INSTANCES,
3129 						false,
3130 						false,
3131 						VkBuildAccelerationStructureFlagsKHR(0u),
3132 						OT_NONE,
3133 						OP_NONE,
3134 						RTAS_DEFAULT_SIZE,
3135 						RTAS_DEFAULT_SIZE,
3136 						de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
3137 						0u,
3138 						EmptyAccelerationStructureCase::NOT_EMPTY,
3139 						InstanceCustomIndexCase::NONE,
3140 					};
3141 					paddingGroup->addChild(new RayTracingASBasicTestCase(group->getTestContext(), indexFormats[indexFormatNdx].name, "", testParams));
3142 				}
3143 				vertexFormatGroup->addChild(paddingGroup.release());
3144 			}
3145 			buildGroup->addChild(vertexFormatGroup.release());
3146 		}
3147 		group->addChild(buildGroup.release());
3148 	}
3149 }
3150 
addOperationTestsImpl(tcu::TestCaseGroup * group,const deUint32 workerThreads)3151 void addOperationTestsImpl (tcu::TestCaseGroup* group, const deUint32 workerThreads)
3152 {
3153 	struct
3154 	{
3155 		OperationType										operationType;
3156 		const char*											name;
3157 	} operationTypes[] =
3158 	{
3159 		{ OP_COPY,											"copy"			},
3160 		{ OP_COMPACT,										"compaction"	},
3161 		{ OP_SERIALIZE,										"serialization"	},
3162 	};
3163 
3164 	struct
3165 	{
3166 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
3167 		const char*											name;
3168 	} buildTypes[] =
3169 	{
3170 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
3171 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
3172 	};
3173 
3174 	struct
3175 	{
3176 		OperationTarget										operationTarget;
3177 		const char*											name;
3178 	} operationTargets[] =
3179 	{
3180 		{ OT_TOP_ACCELERATION,								"top_acceleration_structure"		},
3181 		{ OT_BOTTOM_ACCELERATION,							"bottom_acceleration_structure"	},
3182 	};
3183 
3184 	struct
3185 	{
3186 		BottomTestType										testType;
3187 		const char*											name;
3188 	} bottomTestTypes[] =
3189 	{
3190 		{ BTT_TRIANGLES,									"triangles" },
3191 		{ BTT_AABBS,										"aabbs" },
3192 	};
3193 
3194 	for (size_t operationTypeNdx = 0; operationTypeNdx < DE_LENGTH_OF_ARRAY(operationTypes); ++operationTypeNdx)
3195 	{
3196 		if (workerThreads > 0)
3197 			if (operationTypes[operationTypeNdx].operationType != OP_COPY && operationTypes[operationTypeNdx].operationType != OP_SERIALIZE)
3198 				continue;
3199 
3200 		de::MovePtr<tcu::TestCaseGroup> operationTypeGroup(new tcu::TestCaseGroup(group->getTestContext(), operationTypes[operationTypeNdx].name, ""));
3201 
3202 		for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
3203 		{
3204 			if (workerThreads > 0 && buildTypes[buildTypeNdx].buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
3205 				continue;
3206 
3207 			de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name, ""));
3208 
3209 			for (size_t operationTargetNdx = 0; operationTargetNdx < DE_LENGTH_OF_ARRAY(operationTargets); ++operationTargetNdx)
3210 			{
3211 				de::MovePtr<tcu::TestCaseGroup> operationTargetGroup(new tcu::TestCaseGroup(group->getTestContext(), operationTargets[operationTargetNdx].name, ""));
3212 
3213 				for (size_t testTypeNdx = 0; testTypeNdx < DE_LENGTH_OF_ARRAY(bottomTestTypes); ++testTypeNdx)
3214 				{
3215 					TopTestType topTest = (operationTargets[operationTargetNdx].operationTarget == OT_TOP_ACCELERATION) ? TTT_DIFFERENT_INSTANCES : TTT_IDENTICAL_INSTANCES;
3216 
3217 					TestParams testParams
3218 					{
3219 						buildTypes[buildTypeNdx].buildType,
3220 						VK_FORMAT_R32G32B32_SFLOAT,
3221 						false,
3222 						VK_INDEX_TYPE_NONE_KHR,
3223 						bottomTestTypes[testTypeNdx].testType,
3224 						InstanceCullFlags::NONE,
3225 						false,
3226 						false,
3227 						topTest,
3228 						false,
3229 						false,
3230 						VkBuildAccelerationStructureFlagsKHR(0u),
3231 						operationTargets[operationTargetNdx].operationTarget,
3232 						operationTypes[operationTypeNdx].operationType,
3233 						RTAS_DEFAULT_SIZE,
3234 						RTAS_DEFAULT_SIZE,
3235 						de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
3236 						workerThreads,
3237 						EmptyAccelerationStructureCase::NOT_EMPTY,
3238 						InstanceCustomIndexCase::NONE,
3239 					};
3240 					operationTargetGroup->addChild(new RayTracingASBasicTestCase(group->getTestContext(), bottomTestTypes[testTypeNdx].name, "", testParams));
3241 				}
3242 				buildGroup->addChild(operationTargetGroup.release());
3243 			}
3244 			operationTypeGroup->addChild(buildGroup.release());
3245 		}
3246 		group->addChild(operationTypeGroup.release());
3247 	}
3248 }
3249 
addOperationTests(tcu::TestCaseGroup * group)3250 void addOperationTests (tcu::TestCaseGroup* group)
3251 {
3252 	addOperationTestsImpl(group, 0);
3253 }
3254 
addHostThreadingOperationTests(tcu::TestCaseGroup * group)3255 void addHostThreadingOperationTests (tcu::TestCaseGroup* group)
3256 {
3257 	const deUint32	threads[]	= { 1, 2, 3, 4, 8, std::numeric_limits<deUint32>::max() };
3258 
3259 	for (size_t threadsNdx = 0; threadsNdx < DE_LENGTH_OF_ARRAY(threads); ++threadsNdx)
3260 	{
3261 		const std::string groupName = threads[threadsNdx] != std::numeric_limits<deUint32>::max()
3262 									? de::toString(threads[threadsNdx])
3263 									: "max";
3264 
3265 		de::MovePtr<tcu::TestCaseGroup> threadGroup(new tcu::TestCaseGroup(group->getTestContext(), groupName.c_str(), ""));
3266 
3267 		addOperationTestsImpl(threadGroup.get(), threads[threadsNdx]);
3268 
3269 		group->addChild(threadGroup.release());
3270 	}
3271 }
3272 
addFuncArgTests(tcu::TestCaseGroup * group)3273 void addFuncArgTests (tcu::TestCaseGroup* group)
3274 {
3275 	const struct
3276 	{
3277 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
3278 		const char*											name;
3279 	} buildTypes[] =
3280 	{
3281 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
3282 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
3283 	};
3284 
3285 	auto& ctx = group->getTestContext();
3286 
3287 	for (int buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
3288 	{
3289 		TestParams testParams
3290 		{
3291 			buildTypes[buildTypeNdx].buildType,
3292 			VK_FORMAT_R32G32B32_SFLOAT,
3293 			false,
3294 			VK_INDEX_TYPE_NONE_KHR,
3295 			BTT_TRIANGLES,
3296 			InstanceCullFlags::NONE,
3297 			false,
3298 			false,
3299 			TTT_IDENTICAL_INSTANCES,
3300 			false,
3301 			false,
3302 			VkBuildAccelerationStructureFlagsKHR(0u),
3303 			OT_NONE,
3304 			OP_NONE,
3305 			RTAS_DEFAULT_SIZE,
3306 			RTAS_DEFAULT_SIZE,
3307 			de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
3308 			0u,
3309 			EmptyAccelerationStructureCase::NOT_EMPTY,
3310 			InstanceCustomIndexCase::NONE,
3311 		};
3312 
3313 		group->addChild(new RayTracingASFuncArgTestCase(ctx, buildTypes[buildTypeNdx].name, "", testParams));
3314 	}
3315 }
3316 
addInstanceTriangleCullingTests(tcu::TestCaseGroup * group)3317 void addInstanceTriangleCullingTests (tcu::TestCaseGroup* group)
3318 {
3319 	const struct
3320 	{
3321 		InstanceCullFlags	cullFlags;
3322 		std::string			name;
3323 	} cullFlags[] =
3324 	{
3325 		{ InstanceCullFlags::NONE,				"noflags"		},
3326 		{ InstanceCullFlags::COUNTERCLOCKWISE,	"ccw"			},
3327 		{ InstanceCullFlags::CULL_DISABLE,		"nocull"		},
3328 		{ InstanceCullFlags::ALL,				"ccw_nocull"	},
3329 	};
3330 
3331 	const struct
3332 	{
3333 		TopTestType	topType;
3334 		std::string	name;
3335 	} topType[] =
3336 	{
3337 		{ TTT_DIFFERENT_INSTANCES, "transformed"	},	// Each instance has its own transformation matrix.
3338 		{ TTT_IDENTICAL_INSTANCES, "notransform"	},	// "Identical" instances, different geometries.
3339 	};
3340 
3341 	const struct
3342 	{
3343 		vk::VkAccelerationStructureBuildTypeKHR	buildType;
3344 		std::string								name;
3345 	} buildTypes[] =
3346 	{
3347 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
3348 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
3349 	};
3350 
3351 	const struct
3352 	{
3353 		VkIndexType	indexType;
3354 		std::string	name;
3355 	} indexFormats[] =
3356 	{
3357 		{ VK_INDEX_TYPE_NONE_KHR ,	"index_none"	},
3358 		{ VK_INDEX_TYPE_UINT16 ,	"index_uint16"	},
3359 		{ VK_INDEX_TYPE_UINT32 ,	"index_uint32"	},
3360 	};
3361 
3362 	auto& ctx = group->getTestContext();
3363 
3364 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
3365 	{
3366 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
3367 
3368 		for (int indexFormatIdx = 0; indexFormatIdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatIdx)
3369 		{
3370 			de::MovePtr<tcu::TestCaseGroup> indexTypeGroup(new tcu::TestCaseGroup(ctx, indexFormats[indexFormatIdx].name.c_str(), ""));
3371 
3372 			for (int topTypeIdx = 0; topTypeIdx < DE_LENGTH_OF_ARRAY(topType); ++topTypeIdx)
3373 			{
3374 				for (int cullFlagsIdx = 0; cullFlagsIdx < DE_LENGTH_OF_ARRAY(cullFlags); ++cullFlagsIdx)
3375 				{
3376 					const std::string testName = topType[topTypeIdx].name + "_" + cullFlags[cullFlagsIdx].name;
3377 
3378 					TestParams testParams
3379 					{
3380 						buildTypes[buildTypeIdx].buildType,
3381 						VK_FORMAT_R32G32B32_SFLOAT,
3382 						false,
3383 						indexFormats[indexFormatIdx].indexType,
3384 						BTT_TRIANGLES,
3385 						cullFlags[cullFlagsIdx].cullFlags,
3386 						false,
3387 						false,
3388 						topType[topTypeIdx].topType,
3389 						false,
3390 						false,
3391 						VkBuildAccelerationStructureFlagsKHR(0u),
3392 						OT_NONE,
3393 						OP_NONE,
3394 						RTAS_DEFAULT_SIZE,
3395 						RTAS_DEFAULT_SIZE,
3396 						de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
3397 						0u,
3398 						EmptyAccelerationStructureCase::NOT_EMPTY,
3399 						InstanceCustomIndexCase::NONE,
3400 					};
3401 					indexTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, testName.c_str(), "", testParams));
3402 				}
3403 			}
3404 			buildTypeGroup->addChild(indexTypeGroup.release());
3405 		}
3406 		group->addChild(buildTypeGroup.release());
3407 	}
3408 }
3409 
addDynamicIndexingTests(tcu::TestCaseGroup * group)3410 void addDynamicIndexingTests (tcu::TestCaseGroup* group)
3411 {
3412 	auto& ctx = group->getTestContext();
3413 	group->addChild(new RayTracingASDynamicIndexingTestCase(ctx, "dynamic_indexing"));
3414 }
3415 
addEmptyAccelerationStructureTests(tcu::TestCaseGroup * group)3416 void addEmptyAccelerationStructureTests (tcu::TestCaseGroup* group)
3417 {
3418 	const struct
3419 	{
3420 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
3421 		std::string											name;
3422 	} buildTypes[] =
3423 	{
3424 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
3425 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
3426 	};
3427 
3428 	const struct
3429 	{
3430 		VkIndexType								indexType;
3431 		std::string								name;
3432 	} indexFormats[] =
3433 	{
3434 		{ VK_INDEX_TYPE_NONE_KHR,				"index_none"	},
3435 		{ VK_INDEX_TYPE_UINT16,					"index_uint16"	},
3436 		{ VK_INDEX_TYPE_UINT32,					"index_uint32"	},
3437 	};
3438 
3439 	const struct
3440 	{
3441 		EmptyAccelerationStructureCase	emptyASCase;
3442 		std::string						name;
3443 	} emptyCases[] =
3444 	{
3445 		{ EmptyAccelerationStructureCase::INACTIVE_TRIANGLES,	"inactive_triangles"	},
3446 		{ EmptyAccelerationStructureCase::INACTIVE_INSTANCES,	"inactive_instances"	},
3447 		{ EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM,	"no_geometries_bottom"	},
3448 		{ EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP,	"no_primitives_top"		},
3449 		{ EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM,	"no_primitives_bottom"	},
3450 	};
3451 
3452 	auto& ctx = group->getTestContext();
3453 
3454 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
3455 	{
3456 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
3457 
3458 		for (int indexFormatIdx = 0; indexFormatIdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatIdx)
3459 		{
3460 			de::MovePtr<tcu::TestCaseGroup> indexTypeGroup(new tcu::TestCaseGroup(ctx, indexFormats[indexFormatIdx].name.c_str(), ""));
3461 
3462 			for (int emptyCaseIdx = 0; emptyCaseIdx < DE_LENGTH_OF_ARRAY(emptyCases); ++emptyCaseIdx)
3463 			{
3464 				TestParams testParams
3465 				{
3466 					buildTypes[buildTypeIdx].buildType,
3467 					VK_FORMAT_R32G32B32_SFLOAT,
3468 					false,
3469 					indexFormats[indexFormatIdx].indexType,
3470 					BTT_TRIANGLES,
3471 					InstanceCullFlags::NONE,
3472 					false,
3473 					false,
3474 					TTT_IDENTICAL_INSTANCES,
3475 					false,
3476 					false,
3477 					VkBuildAccelerationStructureFlagsKHR(0u),
3478 					OT_NONE,
3479 					OP_NONE,
3480 					RTAS_DEFAULT_SIZE,
3481 					RTAS_DEFAULT_SIZE,
3482 					de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
3483 					0u,
3484 					emptyCases[emptyCaseIdx].emptyASCase,
3485 					InstanceCustomIndexCase::NONE,
3486 				};
3487 				indexTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, emptyCases[emptyCaseIdx].name.c_str(), "", testParams));
3488 			}
3489 			buildTypeGroup->addChild(indexTypeGroup.release());
3490 		}
3491 		group->addChild(buildTypeGroup.release());
3492 	}
3493 }
3494 
addInstanceIndexTests(tcu::TestCaseGroup * group)3495 void addInstanceIndexTests (tcu::TestCaseGroup* group)
3496 {
3497 	const struct
3498 	{
3499 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
3500 		std::string											name;
3501 	} buildTypes[] =
3502 	{
3503 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
3504 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
3505 	};
3506 
3507 	const struct
3508 	{
3509 		InstanceCustomIndexCase						customIndexCase;
3510 		std::string									name;
3511 	} customIndexCases[] =
3512 	{
3513 		{ InstanceCustomIndexCase::NONE,			"no_instance_index"	},
3514 		{ InstanceCustomIndexCase::ANY_HIT,			"ahit"				},
3515 		{ InstanceCustomIndexCase::CLOSEST_HIT,		"chit"				},
3516 		{ InstanceCustomIndexCase::INTERSECTION,	"isec"				},
3517 	};
3518 
3519 	auto& ctx = group->getTestContext();
3520 
3521 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
3522 	{
3523 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
3524 
3525 		for (int customIndexCaseIdx = 0; customIndexCaseIdx < DE_LENGTH_OF_ARRAY(customIndexCases); ++customIndexCaseIdx)
3526 		{
3527 			const auto&	idxCase				= customIndexCases[customIndexCaseIdx].customIndexCase;
3528 			const auto	bottomGeometryType	= ((idxCase == InstanceCustomIndexCase::INTERSECTION) ? BTT_AABBS : BTT_TRIANGLES);
3529 
3530 			TestParams testParams
3531 			{
3532 				buildTypes[buildTypeIdx].buildType,
3533 				VK_FORMAT_R32G32B32_SFLOAT,
3534 				false,
3535 				VK_INDEX_TYPE_NONE_KHR,
3536 				bottomGeometryType,
3537 				InstanceCullFlags::NONE,
3538 				false,
3539 				false,
3540 				TTT_IDENTICAL_INSTANCES,
3541 				false,
3542 				false,
3543 				VkBuildAccelerationStructureFlagsKHR(0u),
3544 				OT_NONE,
3545 				OP_NONE,
3546 				RTAS_DEFAULT_SIZE,
3547 				RTAS_DEFAULT_SIZE,
3548 				de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
3549 				0u,
3550 				EmptyAccelerationStructureCase::NOT_EMPTY,
3551 				customIndexCases[customIndexCaseIdx].customIndexCase,
3552 			};
3553 			buildTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, customIndexCases[customIndexCaseIdx].name.c_str(), "", testParams));
3554 		}
3555 		group->addChild(buildTypeGroup.release());
3556 	}
3557 }
3558 
addGetDeviceAccelerationStructureCompabilityTests(tcu::TestCaseGroup * group)3559 void addGetDeviceAccelerationStructureCompabilityTests (tcu::TestCaseGroup* group)
3560 {
3561 	struct
3562 	{
3563 		vk::VkAccelerationStructureBuildTypeKHR				buildType;
3564 		std::string											name;
3565 	}
3566 	const buildTypes[] =
3567 	{
3568 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
3569 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
3570 	};
3571 
3572 	struct
3573 	{
3574 		OperationTarget	target;
3575 		std::string		name;
3576 	}
3577 	const targets[] =
3578 	{
3579 		{ OT_TOP_ACCELERATION,		"top" },
3580 		{ OT_BOTTOM_ACCELERATION,	"bottom" },
3581 	};
3582 
3583 	auto& ctx = group->getTestContext();
3584 
3585 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
3586 	{
3587 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
3588 
3589 		for (int targetIdx = 0; targetIdx < DE_LENGTH_OF_ARRAY(targets); ++targetIdx)
3590 		{
3591 			TestParams testParams
3592 			{
3593 				buildTypes[buildTypeIdx].buildType,									// buildType		- are we making AS on CPU or GPU
3594 				VK_FORMAT_R32G32B32_SFLOAT,											// vertexFormat
3595 				false,																// padVertices
3596 				VK_INDEX_TYPE_NONE_KHR,												// indexType
3597 				BTT_TRIANGLES,														// bottomTestType	- what kind of geometry is stored in bottom AS
3598 				InstanceCullFlags::NONE,											// cullFlags		- Flags for instances, if needed.
3599 				false,																// bottomUsesAOP	- does bottom AS use arrays, or arrays of pointers
3600 				false,																// bottomGeneric	- Bottom created as generic AS type.
3601 				TTT_IDENTICAL_INSTANCES,											// topTestType		- If instances are identical then bottom geometries must have different vertices/aabbs
3602 				false,																// topUsesAOP		- does top AS use arrays, or arrays of pointers
3603 				false,																// topGeneric		- Top created as generic AS type.
3604 				VkBuildAccelerationStructureFlagsKHR(0u),							// buildFlags
3605 				targets[targetIdx].target,											// operationTarget
3606 				OP_NONE,															// operationType
3607 				RTAS_DEFAULT_SIZE,													// width
3608 				RTAS_DEFAULT_SIZE,													// height
3609 				de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),	// testConfiguration
3610 				0u,																	// workerThreadsCount
3611 				EmptyAccelerationStructureCase::NOT_EMPTY,							// emptyASCase
3612 				InstanceCustomIndexCase::NONE,										// instanceCustomIndexCase
3613 			};
3614 			buildTypeGroup->addChild(new RayTracingDeviceASCompabilityKHRTestCase(ctx, targets[targetIdx].name.c_str(), de::SharedPtr<TestParams>(new TestParams(testParams))));
3615 		}
3616 		group->addChild(buildTypeGroup.release());
3617 	}
3618 }
3619 
addUpdateHeaderBottomAddressTests(tcu::TestCaseGroup * group)3620 void addUpdateHeaderBottomAddressTests (tcu::TestCaseGroup* group)
3621 {
3622 	struct
3623 	{
3624 		vk::VkAccelerationStructureBuildTypeKHR		buildType;
3625 		std::string									name;
3626 	}
3627 	const buildTypes[] =
3628 	{
3629 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR,	"cpu_built"	},
3630 		{ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,	"gpu_built"	},
3631 	};
3632 
3633 	struct
3634 	{
3635 		TopTestType	type;
3636 		std::string	name;
3637 	}
3638 	const instTypes[] =
3639 	{
3640 		{ TTT_IDENTICAL_INSTANCES,	"the_same_instances"		},
3641 		{ TTT_DIFFERENT_INSTANCES,	"different_instances"		},
3642 		{ TTT_MIX_INSTANCES,		"mix_same_diff_instances"	},
3643 	};
3644 
3645 	auto& ctx = group->getTestContext();
3646 
3647 	for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
3648 	{
3649 		de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str(), ""));
3650 
3651 		for (int instTypeIdx = 0; instTypeIdx < DE_LENGTH_OF_ARRAY(instTypes); ++instTypeIdx)
3652 		{
3653 			TestParams testParams
3654 			{
3655 				buildTypes[buildTypeIdx].buildType,									// buildType
3656 				VK_FORMAT_R32G32B32_SFLOAT,											// vertexFormat
3657 				false,																// padVertices
3658 				VK_INDEX_TYPE_NONE_KHR,												// indexType
3659 				BTT_TRIANGLES,														// bottomTestType
3660 				InstanceCullFlags::NONE,											// cullFlags
3661 				false,																// bottomUsesAOP
3662 				false,																// bottomGeneric
3663 				instTypes[instTypeIdx].type,										// topTestType
3664 				false,																// topUsesAOP
3665 				false,																// topGeneric
3666 				VkBuildAccelerationStructureFlagsKHR(0u),							// buildFlags
3667 				OT_TOP_ACCELERATION,												// operationTarget
3668 				OP_NONE,															// operationType
3669 				RTAS_DEFAULT_SIZE,													// width
3670 				RTAS_DEFAULT_SIZE,													// height
3671 				de::SharedPtr<TestConfiguration>(DE_NULL),							// testConfiguration
3672 				0u,																	// workerThreadsCount
3673 				EmptyAccelerationStructureCase::NOT_EMPTY,							// emptyASCase
3674 				InstanceCustomIndexCase::NONE,										// instanceCustomIndexCase
3675 			};
3676 			buildTypeGroup->addChild(new RayTracingHeaderBottomAddressTestCase(ctx, instTypes[instTypeIdx].name.c_str(), de::SharedPtr<TestParams>(new TestParams(testParams))));
3677 		}
3678 		group->addChild(buildTypeGroup.release());
3679 	}
3680 }
3681 
createAccelerationStructuresTests(tcu::TestContext & testCtx)3682 tcu::TestCaseGroup*	createAccelerationStructuresTests(tcu::TestContext& testCtx)
3683 {
3684 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "acceleration_structures", "Acceleration structure tests"));
3685 
3686 	addTestGroup(group.get(), "flags", "Test building AS with different build types, build flags and geometries/instances using arrays or arrays of pointers", addBasicBuildingTests);
3687 	addTestGroup(group.get(), "format", "Test building AS with different vertex and index formats", addVertexIndexFormatsTests);
3688 	addTestGroup(group.get(), "operations", "Test copying, compaction and serialization of AS", addOperationTests);
3689 	addTestGroup(group.get(), "host_threading", "Test host threading operations", addHostThreadingOperationTests);
3690 	addTestGroup(group.get(), "function_argument", "Test using AS as function argument using both pointers and bare values", addFuncArgTests);
3691 	addTestGroup(group.get(), "instance_triangle_culling", "Test building AS with counterclockwise triangles and/or disabling face culling", addInstanceTriangleCullingTests);
3692 	addTestGroup(group.get(), "dynamic_indexing", "Exercise dynamic indexing of acceleration structures", addDynamicIndexingTests);
3693 	addTestGroup(group.get(), "empty", "Test building empty acceleration structures using different methods", addEmptyAccelerationStructureTests);
3694 	addTestGroup(group.get(), "instance_index", "Test using different values for the instance index and checking them in shaders", addInstanceIndexTests);
3695 	addTestGroup(group.get(), "device_compability_khr", "", addGetDeviceAccelerationStructureCompabilityTests);
3696 	addTestGroup(group.get(), "header_bottom_address", "", addUpdateHeaderBottomAddressTests);
3697 
3698 	return group.release();
3699 }
3700 
3701 }	// RayTracing
3702 
3703 }	// vkt
3704 
3705