• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan CTS Framework
3  * --------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Utilities for creating commonly used Vulkan objects
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vkRayTracingUtil.hpp"
25 
26 #include "vkRefUtil.hpp"
27 #include "vkQueryUtil.hpp"
28 #include "vkObjUtil.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkCmdUtil.hpp"
31 
32 #include "deStringUtil.hpp"
33 #include "deSTLUtil.hpp"
34 
35 #include <vector>
36 #include <string>
37 #include <thread>
38 #include <limits>
39 #include <type_traits>
40 #include <map>
41 
42 namespace vk
43 {
44 
45 #ifndef CTS_USES_VULKANSC
46 
47 struct DeferredThreadParams
48 {
49 	const DeviceInterface&	vk;
50 	VkDevice				device;
51 	VkDeferredOperationKHR	deferredOperation;
52 	VkResult				result;
53 };
54 
getFormatSimpleName(vk::VkFormat format)55 std::string getFormatSimpleName (vk::VkFormat format)
56 {
57 	constexpr size_t kPrefixLen = 10; // strlen("VK_FORMAT_")
58 	return de::toLower(de::toString(format).substr(kPrefixLen));
59 }
60 
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)61 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
62 {
63 	float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
64 	float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
65 
66 	if ((s < 0) != (t < 0))
67 		return false;
68 
69 	float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
70 
71 	return a < 0 ?
72 		(s <= 0 && s + t >= a) :
73 		(s >= 0 && s + t <= a);
74 }
75 
76 // Returns true if VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR needs to be supported for the given format.
isMandatoryAccelerationStructureVertexBufferFormat(vk::VkFormat format)77 static bool isMandatoryAccelerationStructureVertexBufferFormat (vk::VkFormat format)
78 {
79 	bool mandatory = false;
80 
81 	switch (format)
82 	{
83     case VK_FORMAT_R32G32_SFLOAT:
84     case VK_FORMAT_R32G32B32_SFLOAT:
85     case VK_FORMAT_R16G16_SFLOAT:
86     case VK_FORMAT_R16G16B16A16_SFLOAT:
87     case VK_FORMAT_R16G16_SNORM:
88     case VK_FORMAT_R16G16B16A16_SNORM:
89 		mandatory = true;
90 		break;
91 	default:
92 		break;
93 	}
94 
95 	return mandatory;
96 }
97 
checkAccelerationStructureVertexBufferFormat(const vk::InstanceInterface & vki,vk::VkPhysicalDevice physicalDevice,vk::VkFormat format)98 void checkAccelerationStructureVertexBufferFormat (const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice, vk::VkFormat format)
99 {
100 	const vk::VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
101 
102 	if ((formatProperties.bufferFeatures & vk::VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR) == 0u)
103 	{
104 		const std::string errorMsg = "Format not supported for acceleration structure vertex buffers";
105 		if (isMandatoryAccelerationStructureVertexBufferFormat(format))
106 			TCU_FAIL(errorMsg);
107 		TCU_THROW(NotSupportedError, errorMsg);
108 	}
109 }
110 
getCommonRayGenerationShader(void)111 std::string getCommonRayGenerationShader (void)
112 {
113 	return
114 		"#version 460 core\n"
115 		"#extension GL_EXT_ray_tracing : require\n"
116 		"layout(location = 0) rayPayloadEXT vec3 hitValue;\n"
117 		"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
118 		"\n"
119 		"void main()\n"
120 		"{\n"
121 		"  uint  rayFlags = 0;\n"
122 		"  uint  cullMask = 0xFF;\n"
123 		"  float tmin     = 0.0;\n"
124 		"  float tmax     = 9.0;\n"
125 		"  vec3  origin   = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
126 		"  vec3  direct   = vec3(0.0, 0.0, -1.0);\n"
127 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
128 		"}\n";
129 }
130 
RaytracedGeometryBase(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType)131 RaytracedGeometryBase::RaytracedGeometryBase (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType)
132 	: m_geometryType	(geometryType)
133 	, m_vertexFormat	(vertexFormat)
134 	, m_indexType		(indexType)
135 	, m_geometryFlags	((VkGeometryFlagsKHR)0u)
136 	, m_hasOpacityMicromap (false)
137 {
138 	if (m_geometryType == VK_GEOMETRY_TYPE_AABBS_KHR)
139 		DE_ASSERT(m_vertexFormat == VK_FORMAT_R32G32B32_SFLOAT);
140 }
141 
~RaytracedGeometryBase()142 RaytracedGeometryBase::~RaytracedGeometryBase ()
143 {
144 }
145 
146 struct GeometryBuilderParams
147 {
148 	VkGeometryTypeKHR	geometryType;
149 	bool				usePadding;
150 };
151 
152 template <typename V, typename I>
buildRaytracedGeometry(const GeometryBuilderParams & params)153 RaytracedGeometryBase* buildRaytracedGeometry (const GeometryBuilderParams& params)
154 {
155 	return new RaytracedGeometry<V, I>(params.geometryType, (params.usePadding ? 1u : 0u));
156 }
157 
makeRaytracedGeometry(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType,bool padVertices)158 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType, bool padVertices)
159 {
160 	const GeometryBuilderParams builderParams { geometryType, padVertices };
161 
162 	switch (vertexFormat)
163 	{
164 		case VK_FORMAT_R32G32_SFLOAT:
165 			switch (indexType)
166 			{
167 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint16>(builderParams));
168 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint32>(builderParams));
169 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, EmptyIndex>(builderParams));
170 				default:						TCU_THROW(InternalError, "Wrong index type");
171 			}
172 		case VK_FORMAT_R32G32B32_SFLOAT:
173 			switch (indexType)
174 			{
175 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint16>(builderParams));
176 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint32>(builderParams));
177 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, EmptyIndex>(builderParams));
178 				default:						TCU_THROW(InternalError, "Wrong index type");
179 			}
180 		case VK_FORMAT_R32G32B32A32_SFLOAT:
181 			switch (indexType)
182 			{
183 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint16>(builderParams));
184 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint32>(builderParams));
185 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, EmptyIndex>(builderParams));
186 				default:						TCU_THROW(InternalError, "Wrong index type");
187 			}
188 		case VK_FORMAT_R16G16_SFLOAT:
189 			switch (indexType)
190 			{
191 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint16>(builderParams));
192 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint32>(builderParams));
193 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, EmptyIndex>(builderParams));
194 				default:						TCU_THROW(InternalError, "Wrong index type");
195 			}
196 		case VK_FORMAT_R16G16B16_SFLOAT:
197 			switch (indexType)
198 			{
199 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint16>(builderParams));
200 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint32>(builderParams));
201 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, EmptyIndex>(builderParams));
202 				default:						TCU_THROW(InternalError, "Wrong index type");
203 			}
204 		case VK_FORMAT_R16G16B16A16_SFLOAT:
205 			switch (indexType)
206 			{
207 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint16>(builderParams));
208 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint32>(builderParams));
209 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, EmptyIndex>(builderParams));
210 				default:						TCU_THROW(InternalError, "Wrong index type");
211 			}
212 		case VK_FORMAT_R16G16_SNORM:
213 			switch (indexType)
214 			{
215 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint16>(builderParams));
216 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint32>(builderParams));
217 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, EmptyIndex>(builderParams));
218 				default:						TCU_THROW(InternalError, "Wrong index type");
219 			}
220 		case VK_FORMAT_R16G16B16_SNORM:
221 			switch (indexType)
222 			{
223 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint16>(builderParams));
224 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint32>(builderParams));
225 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, EmptyIndex>(builderParams));
226 				default:						TCU_THROW(InternalError, "Wrong index type");
227 			}
228 		case VK_FORMAT_R16G16B16A16_SNORM:
229 			switch (indexType)
230 			{
231 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint16>(builderParams));
232 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint32>(builderParams));
233 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, EmptyIndex>(builderParams));
234 				default:						TCU_THROW(InternalError, "Wrong index type");
235 			}
236 		case VK_FORMAT_R64G64_SFLOAT:
237 			switch (indexType)
238 			{
239 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint16>(builderParams));
240 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint32>(builderParams));
241 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, EmptyIndex>(builderParams));
242 				default:						TCU_THROW(InternalError, "Wrong index type");
243 			}
244 		case VK_FORMAT_R64G64B64_SFLOAT:
245 			switch (indexType)
246 			{
247 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint16>(builderParams));
248 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint32>(builderParams));
249 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, EmptyIndex>(builderParams));
250 				default:						TCU_THROW(InternalError, "Wrong index type");
251 			}
252 		case VK_FORMAT_R64G64B64A64_SFLOAT:
253 			switch (indexType)
254 			{
255 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint16>(builderParams));
256 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint32>(builderParams));
257 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, EmptyIndex>(builderParams));
258 				default:						TCU_THROW(InternalError, "Wrong index type");
259 			}
260 		case VK_FORMAT_R8G8_SNORM:
261 			switch (indexType)
262 			{
263 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint16>(builderParams));
264 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint32>(builderParams));
265 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, EmptyIndex>(builderParams));
266 				default:						TCU_THROW(InternalError, "Wrong index type");
267 			}
268 		case VK_FORMAT_R8G8B8_SNORM:
269 			switch (indexType)
270 			{
271 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint16>(builderParams));
272 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint32>(builderParams));
273 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, EmptyIndex>(builderParams));
274 				default:						TCU_THROW(InternalError, "Wrong index type");
275 			}
276 		case VK_FORMAT_R8G8B8A8_SNORM:
277 			switch (indexType)
278 			{
279 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint16>(builderParams));
280 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint32>(builderParams));
281 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, EmptyIndex>(builderParams));
282 				default:						TCU_THROW(InternalError, "Wrong index type");
283 			}
284 		default:
285 			TCU_THROW(InternalError, "Wrong vertex format");
286 	}
287 
288 }
289 
getBufferDeviceAddress(const DeviceInterface & vk,const VkDevice device,const VkBuffer buffer,VkDeviceSize offset)290 VkDeviceAddress getBufferDeviceAddress ( const DeviceInterface&	vk,
291 										 const VkDevice			device,
292 										 const VkBuffer			buffer,
293 										 VkDeviceSize			offset )
294 {
295 
296 	if (buffer == DE_NULL)
297 		return 0;
298 
299 	VkBufferDeviceAddressInfo deviceAddressInfo
300 	{
301 		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,		// VkStructureType    sType
302 		DE_NULL,											// const void*        pNext
303 		buffer												// VkBuffer           buffer;
304 	};
305 	return vk.getBufferDeviceAddress(device, &deviceAddressInfo) + offset;
306 }
307 
308 
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,deUint32 queryCount)309 static inline Move<VkQueryPool> makeQueryPool (const DeviceInterface&		vk,
310 											   const VkDevice				device,
311 											   const VkQueryType			queryType,
312 											   deUint32					queryCount)
313 {
314 	const VkQueryPoolCreateInfo				queryPoolCreateInfo =
315 	{
316 		VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,		// sType
317 		DE_NULL,										// pNext
318 		(VkQueryPoolCreateFlags)0,						// flags
319 		queryType,										// queryType
320 		queryCount,										// queryCount
321 		0u,												// pipelineStatistics
322 	};
323 	return createQueryPool(vk, device, &queryPoolCreateInfo);
324 }
325 
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryTrianglesDataKHR & triangles)326 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryTrianglesDataKHR& triangles)
327 {
328 	VkAccelerationStructureGeometryDataKHR result;
329 
330 	deMemset(&result, 0, sizeof(result));
331 
332 	result.triangles = triangles;
333 
334 	return result;
335 }
336 
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryAabbsDataKHR & aabbs)337 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryAabbsDataKHR& aabbs)
338 {
339 	VkAccelerationStructureGeometryDataKHR result;
340 
341 	deMemset(&result, 0, sizeof(result));
342 
343 	result.aabbs = aabbs;
344 
345 	return result;
346 }
347 
makeVkAccelerationStructureInstancesDataKHR(const VkAccelerationStructureGeometryInstancesDataKHR & instances)348 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureInstancesDataKHR (const VkAccelerationStructureGeometryInstancesDataKHR& instances)
349 {
350 	VkAccelerationStructureGeometryDataKHR result;
351 
352 	deMemset(&result, 0, sizeof(result));
353 
354 	result.instances = instances;
355 
356 	return result;
357 }
358 
makeVkAccelerationStructureInstanceKHR(const VkTransformMatrixKHR & transform,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags,deUint64 accelerationStructureReference)359 static inline VkAccelerationStructureInstanceKHR makeVkAccelerationStructureInstanceKHR (const VkTransformMatrixKHR&			transform,
360 																						 deUint32								instanceCustomIndex,
361 																						 deUint32								mask,
362 																						 deUint32								instanceShaderBindingTableRecordOffset,
363 																						 VkGeometryInstanceFlagsKHR				flags,
364 																						 deUint64								accelerationStructureReference)
365 {
366 	VkAccelerationStructureInstanceKHR instance		= { transform, 0, 0, 0, 0, accelerationStructureReference };
367 	instance.instanceCustomIndex					= instanceCustomIndex & 0xFFFFFF;
368 	instance.mask									= mask & 0xFF;
369 	instance.instanceShaderBindingTableRecordOffset	= instanceShaderBindingTableRecordOffset & 0xFFFFFF;
370 	instance.flags									= flags & 0xFF;
371 	return instance;
372 }
373 
getRayTracingShaderGroupHandlesKHR(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)374 VkResult getRayTracingShaderGroupHandlesKHR (const DeviceInterface&		vk,
375 											 const VkDevice				device,
376 											 const VkPipeline			pipeline,
377 											 const deUint32				firstGroup,
378 											 const deUint32				groupCount,
379 											 const deUintptr			dataSize,
380 											 void*						pData)
381 {
382 	return vk.getRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
383 }
384 
getRayTracingShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)385 VkResult getRayTracingShaderGroupHandles (const DeviceInterface&		vk,
386 										  const VkDevice				device,
387 										  const VkPipeline				pipeline,
388 										  const deUint32				firstGroup,
389 										  const deUint32				groupCount,
390 										  const deUintptr				dataSize,
391 										  void*							pData)
392 {
393 	return getRayTracingShaderGroupHandlesKHR(vk, device, pipeline, firstGroup, groupCount, dataSize, pData);
394 }
395 
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation)396 VkResult finishDeferredOperation (const DeviceInterface&	vk,
397 								  VkDevice					device,
398 								  VkDeferredOperationKHR	deferredOperation)
399 {
400 	VkResult result = vk.deferredOperationJoinKHR(device, deferredOperation);
401 
402 	while (result == VK_THREAD_IDLE_KHR)
403 	{
404 		std::this_thread::yield();
405 		result = vk.deferredOperationJoinKHR(device, deferredOperation);
406 	}
407 
408 	switch( result )
409 	{
410 		case VK_SUCCESS:
411 		{
412 			// Deferred operation has finished. Query its result
413 			result = vk.getDeferredOperationResultKHR(device, deferredOperation);
414 
415 			break;
416 		}
417 
418 		case VK_THREAD_DONE_KHR:
419 		{
420 			// Deferred operation is being wrapped up by another thread
421 			// wait for that thread to finish
422 			do
423 			{
424 				std::this_thread::yield();
425 				result = vk.getDeferredOperationResultKHR(device, deferredOperation);
426 			} while (result == VK_NOT_READY);
427 
428 			break;
429 		}
430 
431 		default:
432 		{
433 			DE_ASSERT(false);
434 
435 			break;
436 		}
437 	}
438 
439 	return result;
440 }
441 
finishDeferredOperationThreaded(DeferredThreadParams * deferredThreadParams)442 void finishDeferredOperationThreaded (DeferredThreadParams* deferredThreadParams)
443 {
444 	deferredThreadParams->result = finishDeferredOperation(deferredThreadParams->vk, deferredThreadParams->device, deferredThreadParams->deferredOperation);
445 }
446 
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation,const deUint32 workerThreadCount,const bool operationNotDeferred)447 void finishDeferredOperation (const DeviceInterface&	vk,
448 							  VkDevice					device,
449 							  VkDeferredOperationKHR	deferredOperation,
450 							  const deUint32			workerThreadCount,
451 							  const bool				operationNotDeferred)
452 {
453 
454 	if (operationNotDeferred)
455 	{
456 		// when the operation deferral returns VK_OPERATION_NOT_DEFERRED_KHR,
457 		// the deferred operation should act as if no command was deferred
458 		VK_CHECK(vk.getDeferredOperationResultKHR(device, deferredOperation));
459 
460 
461 		// there is not need to join any threads to the deferred operation,
462 		// so below can be skipped.
463 		return;
464 	}
465 
466 	if (workerThreadCount == 0)
467 	{
468 		VK_CHECK(finishDeferredOperation(vk, device, deferredOperation));
469 	}
470 	else
471 	{
472 		const deUint32							maxThreadCountSupported	= deMinu32(256u, vk.getDeferredOperationMaxConcurrencyKHR(device, deferredOperation));
473 		const deUint32							requestedThreadCount	= workerThreadCount;
474 		const deUint32							testThreadCount			= requestedThreadCount == std::numeric_limits<deUint32>::max() ? maxThreadCountSupported : requestedThreadCount;
475 
476 		if (maxThreadCountSupported == 0)
477 			TCU_FAIL("vkGetDeferredOperationMaxConcurrencyKHR must not return 0");
478 
479 		const DeferredThreadParams				deferredThreadParams	=
480 		{
481 			vk,					//  const DeviceInterface&	vk;
482 			device,				//  VkDevice				device;
483 			deferredOperation,	//  VkDeferredOperationKHR	deferredOperation;
484 			VK_RESULT_MAX_ENUM,	//  VResult					result;
485 		};
486 		std::vector<DeferredThreadParams>		threadParams	(testThreadCount, deferredThreadParams);
487 		std::vector<de::MovePtr<std::thread> >	threads			(testThreadCount);
488 		bool									executionResult	= false;
489 
490 		DE_ASSERT(threads.size() > 0 && threads.size() == testThreadCount);
491 
492 		for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
493 			threads[threadNdx] = de::MovePtr<std::thread>(new std::thread(finishDeferredOperationThreaded, &threadParams[threadNdx]));
494 
495 		for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
496 			threads[threadNdx]->join();
497 
498 		for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
499 			if (threadParams[threadNdx].result == VK_SUCCESS)
500 				executionResult = true;
501 
502 		if (!executionResult)
503 			TCU_FAIL("Neither reported VK_SUCCESS");
504 	}
505 }
506 
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const VkDeviceSize storageSize)507 SerialStorage::SerialStorage (const DeviceInterface&									vk,
508 							  const VkDevice											device,
509 							  Allocator&												allocator,
510 							  const VkAccelerationStructureBuildTypeKHR					buildType,
511 							  const VkDeviceSize										storageSize)
512 	: m_buildType		(buildType)
513 	, m_storageSize		(storageSize)
514 	, m_serialInfo		()
515 {
516 	const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
517 	try
518 	{
519 		m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
520 	}
521 	catch (const tcu::NotSupportedError&)
522 	{
523 		// retry without Cached flag
524 		m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
525 	}
526 }
527 
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const SerialInfo & serialInfo)528 SerialStorage::SerialStorage (const DeviceInterface&						vk,
529 							  const VkDevice								device,
530 							  Allocator&									allocator,
531 							  const VkAccelerationStructureBuildTypeKHR		buildType,
532 							  const SerialInfo&								serialInfo)
533 	: m_buildType		(buildType)
534 	, m_storageSize		(serialInfo.sizes()[0])	// raise assertion if serialInfo is empty
535 	, m_serialInfo		(serialInfo)
536 {
537 	DE_ASSERT(serialInfo.sizes().size() >= 2u);
538 
539 	// create buffer for top-level acceleration structure
540 	{
541 		const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(m_storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
542 		m_buffer										= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
543 	}
544 
545 	// create buffers for bottom-level acceleration structures
546 	{
547 		std::vector<deUint64>	addrs;
548 
549 		for (std::size_t i = 1; i < serialInfo.addresses().size(); ++i)
550 		{
551 			const deUint64& lookAddr = serialInfo.addresses()[i];
552 			auto end = addrs.end();
553 			auto match = std::find_if(addrs.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
554 			if (match == end)
555 			{
556 				addrs.emplace_back(lookAddr);
557 				m_bottoms.emplace_back(de::SharedPtr<SerialStorage>(new SerialStorage(vk, device, allocator, buildType, serialInfo.sizes()[i])));
558 			}
559 		}
560 	}
561 }
562 
getAddress(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)563 VkDeviceOrHostAddressKHR SerialStorage::getAddress (const DeviceInterface&						vk,
564 													const VkDevice								device,
565 													const VkAccelerationStructureBuildTypeKHR	buildType)
566 {
567 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
568 		return makeDeviceOrHostAddressKHR(vk, device, m_buffer->get(), 0);
569 	else
570 		return makeDeviceOrHostAddressKHR(m_buffer->getAllocation().getHostPtr());
571 }
572 
getASHeader()573 SerialStorage::AccelerationStructureHeader* SerialStorage::getASHeader ()
574 {
575 	return reinterpret_cast<AccelerationStructureHeader*>(getHostAddress().hostAddress);
576 }
577 
hasDeepFormat() const578 bool SerialStorage::hasDeepFormat () const
579 {
580 	return (m_serialInfo.sizes().size() >= 2u);
581 }
582 
getBottomStorage(deUint32 index) const583 de::SharedPtr<SerialStorage> SerialStorage::getBottomStorage (deUint32 index) const
584 {
585 	return m_bottoms[index];
586 }
587 
getHostAddress(VkDeviceSize offset)588 VkDeviceOrHostAddressKHR SerialStorage::getHostAddress (VkDeviceSize offset)
589 {
590 	DE_ASSERT(offset < m_storageSize);
591 	return makeDeviceOrHostAddressKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
592 }
593 
getHostAddressConst(VkDeviceSize offset)594 VkDeviceOrHostAddressConstKHR SerialStorage::getHostAddressConst (VkDeviceSize offset)
595 {
596 	return makeDeviceOrHostAddressConstKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
597 }
598 
getAddressConst(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)599 VkDeviceOrHostAddressConstKHR SerialStorage::getAddressConst (const DeviceInterface&					vk,
600 															  const VkDevice							device,
601 															  const VkAccelerationStructureBuildTypeKHR	buildType)
602 {
603 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
604 		return makeDeviceOrHostAddressConstKHR(vk, device, m_buffer->get(), 0);
605 	else
606 		return getHostAddressConst();
607 }
608 
getStorageSize() const609 inline VkDeviceSize SerialStorage::getStorageSize () const
610 {
611 	return m_storageSize;
612 }
613 
getSerialInfo() const614 inline const SerialInfo& SerialStorage::getSerialInfo () const
615 {
616 	return m_serialInfo;
617 }
618 
getDeserializedSize()619 deUint64 SerialStorage::getDeserializedSize ()
620 {
621 	deUint64		result		= 0;
622 	const deUint8*	startPtr	= static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr());
623 
624 	DE_ASSERT(sizeof(result) == DESERIALIZED_SIZE_SIZE);
625 
626 	deMemcpy(&result, startPtr + DESERIALIZED_SIZE_OFFSET, sizeof(result));
627 
628 	return result;
629 }
630 
~BottomLevelAccelerationStructure()631 BottomLevelAccelerationStructure::~BottomLevelAccelerationStructure ()
632 {
633 }
634 
BottomLevelAccelerationStructure()635 BottomLevelAccelerationStructure::BottomLevelAccelerationStructure ()
636 	: m_structureSize		(0u)
637 	, m_updateScratchSize	(0u)
638 	, m_buildScratchSize	(0u)
639 {
640 }
641 
setGeometryData(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags)642 void BottomLevelAccelerationStructure::setGeometryData (const std::vector<tcu::Vec3>&	geometryData,
643 														const bool						triangles,
644 														const VkGeometryFlagsKHR		geometryFlags)
645 {
646 	if (triangles)
647 		DE_ASSERT((geometryData.size() % 3) == 0);
648 	else
649 		DE_ASSERT((geometryData.size() % 2) == 0);
650 
651 	setGeometryCount(1u);
652 
653 	addGeometry(geometryData, triangles, geometryFlags);
654 }
655 
setDefaultGeometryData(const VkShaderStageFlagBits testStage,const VkGeometryFlagsKHR geometryFlags)656 void BottomLevelAccelerationStructure::setDefaultGeometryData (const VkShaderStageFlagBits	testStage,
657 															   const VkGeometryFlagsKHR		geometryFlags)
658 {
659 	bool					trianglesData	= false;
660 	float					z				= 0.0f;
661 	std::vector<tcu::Vec3>	geometryData;
662 
663 	switch (testStage)
664 	{
665 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:		z = -1.0f; trianglesData = true;	break;
666 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:		z = -1.0f; trianglesData = true;	break;
667 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:	z = -1.0f; trianglesData = true;	break;
668 		case VK_SHADER_STAGE_MISS_BIT_KHR:			z = -9.9f; trianglesData = true;	break;
669 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:	z = -1.0f; trianglesData = false;	break;
670 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:		z = -1.0f; trianglesData = true;	break;
671 		default:									TCU_THROW(InternalError, "Unacceptable stage");
672 	}
673 
674 	if (trianglesData)
675 	{
676 		geometryData.reserve(6);
677 
678 		geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
679 		geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
680 		geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
681 		geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
682 		geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
683 		geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
684 	}
685 	else
686 	{
687 		geometryData.reserve(2);
688 
689 		geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
690 		geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
691 	}
692 
693 	setGeometryCount(1u);
694 
695 	addGeometry(geometryData, trianglesData, geometryFlags);
696 }
697 
setGeometryCount(const size_t geometryCount)698 void BottomLevelAccelerationStructure::setGeometryCount (const size_t geometryCount)
699 {
700 	m_geometriesData.clear();
701 
702 	m_geometriesData.reserve(geometryCount);
703 }
704 
addGeometry(de::SharedPtr<RaytracedGeometryBase> & raytracedGeometry)705 void BottomLevelAccelerationStructure::addGeometry (de::SharedPtr<RaytracedGeometryBase>&		raytracedGeometry)
706 {
707 	m_geometriesData.push_back(raytracedGeometry);
708 }
709 
addGeometry(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags,const VkAccelerationStructureTrianglesOpacityMicromapEXT * opacityGeometryMicromap)710 void BottomLevelAccelerationStructure::addGeometry (const std::vector<tcu::Vec3>&	geometryData,
711 													const bool						triangles,
712 													const VkGeometryFlagsKHR		geometryFlags,
713 													const VkAccelerationStructureTrianglesOpacityMicromapEXT* opacityGeometryMicromap)
714 {
715 	DE_ASSERT(geometryData.size() > 0);
716 	DE_ASSERT((triangles && geometryData.size() % 3 == 0) || (!triangles && geometryData.size() % 2 == 0));
717 
718 	if (!triangles)
719 		for (size_t posNdx = 0; posNdx < geometryData.size() / 2; ++posNdx)
720 		{
721 			DE_ASSERT(geometryData[2 * posNdx].x() <= geometryData[2 * posNdx + 1].x());
722 			DE_ASSERT(geometryData[2 * posNdx].y() <= geometryData[2 * posNdx + 1].y());
723 			DE_ASSERT(geometryData[2 * posNdx].z() <= geometryData[2 * posNdx + 1].z());
724 		}
725 
726 	de::SharedPtr<RaytracedGeometryBase> geometry = makeRaytracedGeometry(triangles ? VK_GEOMETRY_TYPE_TRIANGLES_KHR : VK_GEOMETRY_TYPE_AABBS_KHR, VK_FORMAT_R32G32B32_SFLOAT, VK_INDEX_TYPE_NONE_KHR);
727 	for (auto it = begin(geometryData), eit = end(geometryData); it != eit; ++it)
728 		geometry->addVertex(*it);
729 
730 	geometry->setGeometryFlags(geometryFlags);
731 	if (opacityGeometryMicromap)
732 		geometry->setOpacityMicromap(opacityGeometryMicromap);
733 	addGeometry(geometry);
734 }
735 
getStructureBuildSizes() const736 VkAccelerationStructureBuildSizesInfoKHR BottomLevelAccelerationStructure::getStructureBuildSizes () const
737 {
738 	return
739 	{
740 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
741 		DE_NULL,														//  const void*		pNext;
742 		m_structureSize,												//  VkDeviceSize	accelerationStructureSize;
743 		m_updateScratchSize,											//  VkDeviceSize	updateScratchSize;
744 		m_buildScratchSize												//  VkDeviceSize	buildScratchSize;
745 	};
746 };
747 
getVertexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)748 VkDeviceSize getVertexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
749 {
750 	DE_ASSERT(geometriesData.size() != 0);
751 	VkDeviceSize					bufferSizeBytes = 0;
752 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
753 		bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getVertexByteSize(),8);
754 	return bufferSizeBytes;
755 }
756 
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)757 BufferWithMemory* createVertexBuffer (const DeviceInterface&	vk,
758 									  const VkDevice			device,
759 									  Allocator&				allocator,
760 									  const VkDeviceSize		bufferSizeBytes)
761 {
762 	const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
763 	return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
764 }
765 
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)766 BufferWithMemory* createVertexBuffer (const DeviceInterface&									vk,
767 									  const VkDevice											device,
768 									  Allocator&												allocator,
769 									  const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
770 {
771 	return createVertexBuffer(vk, device, allocator, getVertexBufferSize(geometriesData));
772 }
773 
updateVertexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * vertexBuffer,VkDeviceSize geometriesOffset=0)774 void updateVertexBuffer (const DeviceInterface&										vk,
775 						 const VkDevice												device,
776 						 const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData,
777 						 BufferWithMemory*											vertexBuffer,
778 						 VkDeviceSize												geometriesOffset = 0)
779 {
780 	const Allocation&				geometryAlloc		= vertexBuffer->getAllocation();
781 	deUint8*						bufferStart			= static_cast<deUint8*>(geometryAlloc.getHostPtr());
782 	VkDeviceSize					bufferOffset		= geometriesOffset;
783 
784 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
785 	{
786 		const void*					geometryPtr			= geometriesData[geometryNdx]->getVertexPointer();
787 		const size_t				geometryPtrSize		= geometriesData[geometryNdx]->getVertexByteSize();
788 
789 		deMemcpy(&bufferStart[bufferOffset], geometryPtr, geometryPtrSize);
790 
791 		bufferOffset += deAlignSize(geometryPtrSize,8);
792 	}
793 
794 	// Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
795 	// align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
796 	// for the vertex and index buffers, so flushing is actually not needed.
797 	flushAlloc(vk, device, geometryAlloc);
798 }
799 
getIndexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)800 VkDeviceSize getIndexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
801 {
802 	DE_ASSERT(!geometriesData.empty());
803 
804 	VkDeviceSize	bufferSizeBytes = 0;
805 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
806 		if(geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
807 			bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getIndexByteSize(),8);
808 	return bufferSizeBytes;
809 }
810 
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)811 BufferWithMemory* createIndexBuffer (const DeviceInterface&		vk,
812 									 const VkDevice				device,
813 									 Allocator&					allocator,
814 									 const VkDeviceSize			bufferSizeBytes)
815 {
816 	DE_ASSERT(bufferSizeBytes);
817 	const VkBufferCreateInfo		bufferCreateInfo	= makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
818 	return  new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
819 }
820 
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)821 BufferWithMemory* createIndexBuffer (const DeviceInterface&										vk,
822 									 const VkDevice												device,
823 									 Allocator&													allocator,
824 									 const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
825 {
826 
827 
828 	const VkDeviceSize bufferSizeBytes = getIndexBufferSize(geometriesData);
829 	return bufferSizeBytes ? createIndexBuffer(vk, device, allocator, bufferSizeBytes) : nullptr;
830 }
831 
updateIndexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * indexBuffer,VkDeviceSize geometriesOffset)832 void updateIndexBuffer (const DeviceInterface&										vk,
833 						const VkDevice												device,
834 						const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData,
835 						BufferWithMemory*											indexBuffer,
836 						VkDeviceSize												geometriesOffset)
837 {
838 	const Allocation&				indexAlloc			= indexBuffer->getAllocation();
839 	deUint8*						bufferStart			= static_cast<deUint8*>(indexAlloc.getHostPtr());
840 	VkDeviceSize					bufferOffset		= geometriesOffset;
841 
842 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
843 	{
844 		if (geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
845 		{
846 			const void*					indexPtr		= geometriesData[geometryNdx]->getIndexPointer();
847 			const size_t				indexPtrSize	= geometriesData[geometryNdx]->getIndexByteSize();
848 
849 			deMemcpy(&bufferStart[bufferOffset], indexPtr, indexPtrSize);
850 
851 			bufferOffset += deAlignSize(indexPtrSize, 8);
852 		}
853 	}
854 
855 	// Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
856 	// align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
857 	// for the vertex and index buffers, so flushing is actually not needed.
858 	flushAlloc(vk, device, indexAlloc);
859 }
860 
861 class BottomLevelAccelerationStructureKHR : public BottomLevelAccelerationStructure
862 {
863 public:
864 	static deUint32											getRequiredAllocationCount						(void);
865 
866 															BottomLevelAccelerationStructureKHR				();
867 															BottomLevelAccelerationStructureKHR				(const BottomLevelAccelerationStructureKHR&		other) = delete;
868 	virtual													~BottomLevelAccelerationStructureKHR			();
869 
870 	void													setBuildType									(const VkAccelerationStructureBuildTypeKHR		buildType) override;
871 	VkAccelerationStructureBuildTypeKHR						getBuildType									() const override;
872 	void													setCreateFlags									(const VkAccelerationStructureCreateFlagsKHR	createFlags) override;
873 	void													setCreateGeneric								(bool											createGeneric) override;
874 	void													setBuildFlags									(const VkBuildAccelerationStructureFlagsKHR		buildFlags) override;
875 	void													setBuildWithoutGeometries						(bool											buildWithoutGeometries) override;
876 	void													setBuildWithoutPrimitives						(bool											buildWithoutPrimitives) override;
877 	void													setDeferredOperation							(const bool										deferredOperation,
878 																											 const deUint32									workerThreadCount) override;
879 	void													setUseArrayOfPointers							(const bool										useArrayOfPointers) override;
880 	void													setIndirectBuildParameters						(const VkBuffer									indirectBuffer,
881 																											 const VkDeviceSize								indirectBufferOffset,
882 																											 const deUint32									indirectBufferStride) override;
883 	VkBuildAccelerationStructureFlagsKHR					getBuildFlags									() const override;
884 
885 	void													create											(const DeviceInterface&							vk,
886 																											 const VkDevice									device,
887 																											 Allocator&										allocator,
888 																											 VkDeviceSize									structureSize,
889 																											 VkDeviceAddress								deviceAddress			= 0u,
890 																											 const void*									pNext					= DE_NULL,
891 																											 const MemoryRequirement&						addMemoryRequirement	= MemoryRequirement::Any) override;
892 	void													build											(const DeviceInterface&							vk,
893 																											 const VkDevice									device,
894 																											 const VkCommandBuffer							cmdBuffer) override;
895 	void													copyFrom										(const DeviceInterface&							vk,
896 																											 const VkDevice									device,
897 																											 const VkCommandBuffer							cmdBuffer,
898 																											 BottomLevelAccelerationStructure*				accelerationStructure,
899 																											 bool											compactCopy) override;
900 
901 	void													serialize										(const DeviceInterface&							vk,
902 																											 const VkDevice									device,
903 																											 const VkCommandBuffer							cmdBuffer,
904 																											 SerialStorage*									storage) override;
905 	void													deserialize										(const DeviceInterface&							vk,
906 																											 const VkDevice									device,
907 																											 const VkCommandBuffer							cmdBuffer,
908 																											 SerialStorage*									storage) override;
909 
910 	const VkAccelerationStructureKHR*						getPtr											(void) const override;
911 
912 protected:
913 	VkAccelerationStructureBuildTypeKHR						m_buildType;
914 	VkAccelerationStructureCreateFlagsKHR					m_createFlags;
915 	bool													m_createGeneric;
916 	VkBuildAccelerationStructureFlagsKHR					m_buildFlags;
917 	bool													m_buildWithoutGeometries;
918 	bool													m_buildWithoutPrimitives;
919 	bool													m_deferredOperation;
920 	deUint32												m_workerThreadCount;
921 	bool													m_useArrayOfPointers;
922 	de::MovePtr<BufferWithMemory>							m_accelerationStructureBuffer;
923 	de::MovePtr<BufferWithMemory>							m_vertexBuffer;
924 	de::MovePtr<BufferWithMemory>							m_indexBuffer;
925 	de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
926 	de::UniquePtr<std::vector<deUint8>>						m_hostScratchBuffer;
927 	Move<VkAccelerationStructureKHR>						m_accelerationStructureKHR;
928 	VkBuffer												m_indirectBuffer;
929 	VkDeviceSize											m_indirectBufferOffset;
930 	deUint32												m_indirectBufferStride;
931 
932 	void													prepareGeometries								(const DeviceInterface&												vk,
933 																											 const VkDevice														device,
934 																											 std::vector<VkAccelerationStructureGeometryKHR>&					accelerationStructureGeometriesKHR,
935 																											 std::vector<VkAccelerationStructureGeometryKHR*>&					accelerationStructureGeometriesKHRPointers,
936 																											 std::vector<VkAccelerationStructureBuildRangeInfoKHR>&				accelerationStructureBuildRangeInfoKHR,
937 																											 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>&	accelerationStructureGeometryMicromapsEXT,
938 																											 std::vector<deUint32>&												maxPrimitiveCounts,
939 																											 VkDeviceSize														vertexBufferOffset = 0,
940 																											 VkDeviceSize														indexBufferOffset = 0) const;
941 
getAccelerationStructureBuffer() const942 	virtual BufferWithMemory*								getAccelerationStructureBuffer					() const { return m_accelerationStructureBuffer.get(); }
getDeviceScratchBuffer() const943 	virtual BufferWithMemory*								getDeviceScratchBuffer							() const { return m_deviceScratchBuffer.get(); }
getHostScratchBuffer() const944 	virtual std::vector<deUint8>*							getHostScratchBuffer							() const { return m_hostScratchBuffer.get(); }
getVertexBuffer() const945 	virtual BufferWithMemory*								getVertexBuffer									() const { return m_vertexBuffer.get(); }
getIndexBuffer() const946 	virtual BufferWithMemory*								getIndexBuffer									() const { return m_indexBuffer.get(); }
947 
getAccelerationStructureBufferOffset() const948 	virtual VkDeviceSize									getAccelerationStructureBufferOffset			() const { return 0; }
getDeviceScratchBufferOffset() const949 	virtual VkDeviceSize									getDeviceScratchBufferOffset					() const { return 0; }
getVertexBufferOffset() const950 	virtual VkDeviceSize									getVertexBufferOffset							() const { return 0; }
getIndexBufferOffset() const951 	virtual VkDeviceSize									getIndexBufferOffset							() const { return 0; }
952 };
953 
getRequiredAllocationCount(void)954 deUint32 BottomLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
955 {
956 	/*
957 		de::MovePtr<BufferWithMemory>							m_geometryBuffer; // but only when m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
958 		de::MovePtr<Allocation>									m_accelerationStructureAlloc;
959 		de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
960 	*/
961 	return 3u;
962 }
963 
~BottomLevelAccelerationStructureKHR()964 BottomLevelAccelerationStructureKHR::~BottomLevelAccelerationStructureKHR ()
965 {
966 }
967 
BottomLevelAccelerationStructureKHR()968 BottomLevelAccelerationStructureKHR::BottomLevelAccelerationStructureKHR ()
969 	: BottomLevelAccelerationStructure	()
970 	, m_buildType						(VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
971 	, m_createFlags						(0u)
972 	, m_createGeneric					(false)
973 	, m_buildFlags						(0u)
974 	, m_buildWithoutGeometries			(false)
975 	, m_buildWithoutPrimitives			(false)
976 	, m_deferredOperation				(false)
977 	, m_workerThreadCount				(0)
978 	, m_useArrayOfPointers				(false)
979 	, m_accelerationStructureBuffer		(DE_NULL)
980 	, m_vertexBuffer					(DE_NULL)
981 	, m_indexBuffer						(DE_NULL)
982 	, m_deviceScratchBuffer				(DE_NULL)
983 	, m_hostScratchBuffer				(new std::vector<deUint8>)
984 	, m_accelerationStructureKHR		()
985 	, m_indirectBuffer					(DE_NULL)
986 	, m_indirectBufferOffset			(0)
987 	, m_indirectBufferStride			(0)
988 {
989 }
990 
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)991 void BottomLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR	buildType)
992 {
993 	m_buildType = buildType;
994 }
995 
getBuildType() const996 VkAccelerationStructureBuildTypeKHR BottomLevelAccelerationStructureKHR::getBuildType () const
997 {
998 	return m_buildType;
999 }
1000 
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)1001 void BottomLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR	createFlags)
1002 {
1003 	m_createFlags = createFlags;
1004 }
1005 
setCreateGeneric(bool createGeneric)1006 void BottomLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
1007 {
1008 	m_createGeneric = createGeneric;
1009 }
1010 
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)1011 void BottomLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR	buildFlags)
1012 {
1013 	m_buildFlags = buildFlags;
1014 }
1015 
setBuildWithoutGeometries(bool buildWithoutGeometries)1016 void BottomLevelAccelerationStructureKHR::setBuildWithoutGeometries (bool buildWithoutGeometries)
1017 {
1018 	m_buildWithoutGeometries = buildWithoutGeometries;
1019 }
1020 
setBuildWithoutPrimitives(bool buildWithoutPrimitives)1021 void BottomLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
1022 {
1023 	m_buildWithoutPrimitives = buildWithoutPrimitives;
1024 }
1025 
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)1026 void BottomLevelAccelerationStructureKHR::setDeferredOperation (const bool		deferredOperation,
1027 																const deUint32	workerThreadCount)
1028 {
1029 	m_deferredOperation = deferredOperation;
1030 	m_workerThreadCount = workerThreadCount;
1031 }
1032 
setUseArrayOfPointers(const bool useArrayOfPointers)1033 void BottomLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool	useArrayOfPointers)
1034 {
1035 	m_useArrayOfPointers = useArrayOfPointers;
1036 }
1037 
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)1038 void BottomLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer		indirectBuffer,
1039 																	  const VkDeviceSize	indirectBufferOffset,
1040 																	  const deUint32		indirectBufferStride)
1041 {
1042 	m_indirectBuffer		= indirectBuffer;
1043 	m_indirectBufferOffset	= indirectBufferOffset;
1044 	m_indirectBufferStride	= indirectBufferStride;
1045 }
1046 
getBuildFlags() const1047 VkBuildAccelerationStructureFlagsKHR BottomLevelAccelerationStructureKHR::getBuildFlags () const
1048 {
1049 	return m_buildFlags;
1050 }
1051 
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement)1052 void BottomLevelAccelerationStructureKHR::create (const DeviceInterface&				vk,
1053 												  const VkDevice						device,
1054 												  Allocator&							allocator,
1055 												  VkDeviceSize							structureSize,
1056 												  VkDeviceAddress						deviceAddress,
1057 												  const void*							pNext,
1058 												  const MemoryRequirement&				addMemoryRequirement)
1059 {
1060 	// AS may be built from geometries using vkCmdBuildAccelerationStructuresKHR / vkBuildAccelerationStructuresKHR
1061 	// or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
1062 	DE_ASSERT(!m_geometriesData.empty() !=  !(structureSize == 0)); // logical xor
1063 
1064 	if (structureSize == 0)
1065 	{
1066 		std::vector<VkAccelerationStructureGeometryKHR>			accelerationStructureGeometriesKHR;
1067 		std::vector<VkAccelerationStructureGeometryKHR*>		accelerationStructureGeometriesKHRPointers;
1068 		std::vector<VkAccelerationStructureBuildRangeInfoKHR>	accelerationStructureBuildRangeInfoKHR;
1069 		std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1070 		std::vector<deUint32>									maxPrimitiveCounts;
1071 		prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
1072 
1073 		const VkAccelerationStructureGeometryKHR*				accelerationStructureGeometriesKHRPointer	= accelerationStructureGeometriesKHR.data();
1074 		const VkAccelerationStructureGeometryKHR* const*		accelerationStructureGeometry				= accelerationStructureGeometriesKHRPointers.data();
1075 
1076 		VkAccelerationStructureBuildGeometryInfoKHR	accelerationStructureBuildGeometryInfoKHR	=
1077 		{
1078 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,			//  VkStructureType										sType;
1079 			DE_NULL,																	//  const void*											pNext;
1080 			VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,							//  VkAccelerationStructureTypeKHR						type;
1081 			m_buildFlags,																//  VkBuildAccelerationStructureFlagsKHR				flags;
1082 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,								//  VkBuildAccelerationStructureModeKHR					mode;
1083 			DE_NULL,																	//  VkAccelerationStructureKHR							srcAccelerationStructure;
1084 			DE_NULL,																	//  VkAccelerationStructureKHR							dstAccelerationStructure;
1085 			static_cast<deUint32>(accelerationStructureGeometriesKHR.size()),			//  deUint32											geometryCount;
1086 			m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,	//  const VkAccelerationStructureGeometryKHR*			pGeometries;
1087 			m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,				//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
1088 			makeDeviceOrHostAddressKHR(DE_NULL)											//  VkDeviceOrHostAddressKHR							scratchData;
1089 		};
1090 		VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
1091 		{
1092 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
1093 			DE_NULL,														//  const void*		pNext;
1094 			0,																//  VkDeviceSize	accelerationStructureSize;
1095 			0,																//  VkDeviceSize	updateScratchSize;
1096 			0																//  VkDeviceSize	buildScratchSize;
1097 		};
1098 
1099 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
1100 
1101 		m_structureSize		= sizeInfo.accelerationStructureSize;
1102 		m_updateScratchSize	= sizeInfo.updateScratchSize;
1103 		m_buildScratchSize	= sizeInfo.buildScratchSize;
1104 	}
1105 	else
1106 	{
1107 		m_structureSize		= structureSize;
1108 		m_updateScratchSize	= 0u;
1109 		m_buildScratchSize	= 0u;
1110 	}
1111 
1112 	{
1113 		const VkBufferCreateInfo		bufferCreateInfo		= makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1114 		const MemoryRequirement			memoryRequirement		= addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
1115 
1116 		try
1117 		{
1118 			m_accelerationStructureBuffer	= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
1119 		}
1120 		catch (const tcu::NotSupportedError&)
1121 		{
1122 			// retry without Cached flag
1123 			m_accelerationStructureBuffer	= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
1124 		}
1125 	}
1126 
1127 	{
1128 		const VkAccelerationStructureTypeKHR		structureType						= (m_createGeneric
1129 																						   ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
1130 																						   : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
1131 		const VkAccelerationStructureCreateInfoKHR	accelerationStructureCreateInfoKHR
1132 		{
1133 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,						//  VkStructureType											sType;
1134 			pNext,																			//  const void*												pNext;
1135 			m_createFlags,																	//  VkAccelerationStructureCreateFlagsKHR					createFlags;
1136 			getAccelerationStructureBuffer()->get(),										//  VkBuffer												buffer;
1137 			getAccelerationStructureBufferOffset(),											//  VkDeviceSize											offset;
1138 			m_structureSize,																//  VkDeviceSize											size;
1139 			structureType,																	//  VkAccelerationStructureTypeKHR							type;
1140 			deviceAddress																	//  VkDeviceAddress											deviceAddress;
1141 		};
1142 
1143 		m_accelerationStructureKHR	= createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
1144 	}
1145 
1146 	if (m_buildScratchSize > 0u)
1147 	{
1148 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1149 		{
1150 			const VkBufferCreateInfo		bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1151 			m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1152 		}
1153 		else
1154 		{
1155 			m_hostScratchBuffer->resize(static_cast<size_t>(m_buildScratchSize));
1156 		}
1157 	}
1158 
1159 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR && !m_geometriesData.empty())
1160 	{
1161 		m_vertexBuffer	= de::MovePtr<BufferWithMemory>(createVertexBuffer(vk, device, allocator, m_geometriesData));
1162 		m_indexBuffer	= de::MovePtr<BufferWithMemory>(createIndexBuffer(vk, device, allocator, m_geometriesData));
1163 	}
1164 }
1165 
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer)1166 void BottomLevelAccelerationStructureKHR::build (const DeviceInterface&						vk,
1167 												 const VkDevice								device,
1168 												 const VkCommandBuffer						cmdBuffer)
1169 {
1170 	DE_ASSERT(!m_geometriesData.empty());
1171 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1172 	DE_ASSERT(m_buildScratchSize != 0);
1173 
1174 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1175 	{
1176 		updateVertexBuffer(vk, device, m_geometriesData,  getVertexBuffer(), getVertexBufferOffset());
1177 		if(getIndexBuffer() != DE_NULL)
1178 			updateIndexBuffer(vk, device, m_geometriesData, getIndexBuffer(), getIndexBufferOffset());
1179 	}
1180 
1181 	{
1182 		std::vector<VkAccelerationStructureGeometryKHR>			accelerationStructureGeometriesKHR;
1183 		std::vector<VkAccelerationStructureGeometryKHR*>		accelerationStructureGeometriesKHRPointers;
1184 		std::vector<VkAccelerationStructureBuildRangeInfoKHR>	accelerationStructureBuildRangeInfoKHR;
1185 		std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1186 		std::vector<deUint32>									maxPrimitiveCounts;
1187 
1188 		prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers,
1189 						  accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts, getVertexBufferOffset(), getIndexBufferOffset());
1190 
1191 		const VkAccelerationStructureGeometryKHR*			accelerationStructureGeometriesKHRPointer	= accelerationStructureGeometriesKHR.data();
1192 		const VkAccelerationStructureGeometryKHR* const*	accelerationStructureGeometry				= accelerationStructureGeometriesKHRPointers.data();
1193 		VkDeviceOrHostAddressKHR							scratchData									= (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1194 																										? makeDeviceOrHostAddressKHR(vk, device, getDeviceScratchBuffer()->get(), getDeviceScratchBufferOffset())
1195 																										: makeDeviceOrHostAddressKHR(getHostScratchBuffer()->data());
1196 		const deUint32										geometryCount								= (m_buildWithoutGeometries
1197 																										? 0u
1198 																										: static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1199 
1200 		VkAccelerationStructureBuildGeometryInfoKHR	accelerationStructureBuildGeometryInfoKHR	=
1201 		{
1202 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,			//  VkStructureType										sType;
1203 			DE_NULL,																	//  const void*											pNext;
1204 			VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,							//  VkAccelerationStructureTypeKHR						type;
1205 			m_buildFlags,																//  VkBuildAccelerationStructureFlagsKHR				flags;
1206 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,								//  VkBuildAccelerationStructureModeKHR					mode;
1207 			DE_NULL,																	//  VkAccelerationStructureKHR							srcAccelerationStructure;
1208 			m_accelerationStructureKHR.get(),											//  VkAccelerationStructureKHR							dstAccelerationStructure;
1209 			geometryCount,																//  deUint32											geometryCount;
1210 			m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,	//  const VkAccelerationStructureGeometryKHR*			pGeometries;
1211 			m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,				//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
1212 			scratchData																	//  VkDeviceOrHostAddressKHR							scratchData;
1213 		};
1214 
1215 		VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr	= accelerationStructureBuildRangeInfoKHR.data();
1216 
1217 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1218 		{
1219 			if (m_indirectBuffer == DE_NULL)
1220 				vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1221 			else
1222 			{
1223 				VkDeviceAddress	indirectDeviceAddress	= getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
1224 				deUint32*		pMaxPrimitiveCounts		= maxPrimitiveCounts.data();
1225 				vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
1226 			}
1227 		}
1228 		else if (!m_deferredOperation)
1229 		{
1230 			VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
1231 		}
1232 		else
1233 		{
1234 			const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1235 			const auto deferredOperation	= deferredOperationPtr.get();
1236 
1237 			VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1238 
1239 			DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1240 
1241 			finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1242 		}
1243 	}
1244 
1245 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1246 	{
1247 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1248 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
1249 
1250 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1251 	}
1252 }
1253 
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,BottomLevelAccelerationStructure * accelerationStructure,bool compactCopy)1254 void BottomLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&						vk,
1255 													const VkDevice								device,
1256 													const VkCommandBuffer						cmdBuffer,
1257 													BottomLevelAccelerationStructure*			accelerationStructure,
1258 													bool										compactCopy)
1259 {
1260 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1261 	DE_ASSERT(accelerationStructure != DE_NULL);
1262 
1263 	VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1264 	{
1265 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,															// VkStructureType						sType;
1266 		DE_NULL,																										// const void*							pNext;
1267 		*(accelerationStructure->getPtr()),																				// VkAccelerationStructureKHR			src;
1268 		*(getPtr()),																									// VkAccelerationStructureKHR			dst;
1269 		compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR	// VkCopyAccelerationStructureModeKHR	mode;
1270 	};
1271 
1272 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1273 	{
1274 		vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1275 	}
1276 	else if (!m_deferredOperation)
1277 	{
1278 		VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1279 	}
1280 	else
1281 	{
1282 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1283 		const auto deferredOperation	= deferredOperationPtr.get();
1284 
1285 		VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1286 
1287 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1288 
1289 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1290 	}
1291 
1292 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1293 	{
1294 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1295 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
1296 
1297 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1298 	}
1299 }
1300 
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1301 void BottomLevelAccelerationStructureKHR::serialize (const DeviceInterface&		vk,
1302 													 const VkDevice				device,
1303 													 const VkCommandBuffer		cmdBuffer,
1304 													 SerialStorage*				storage)
1305 {
1306 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1307 	DE_ASSERT(storage != DE_NULL);
1308 
1309 	const VkCopyAccelerationStructureToMemoryInfoKHR	copyAccelerationStructureInfo	=
1310 	{
1311 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,	// VkStructureType						sType;
1312 		DE_NULL,															// const void*							pNext;
1313 		*(getPtr()),														// VkAccelerationStructureKHR			src;
1314 		storage->getAddress(vk, device, m_buildType),						// VkDeviceOrHostAddressKHR				dst;
1315 		VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR	mode;
1316 	};
1317 
1318 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1319 	{
1320 		vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
1321 	}
1322 	else if (!m_deferredOperation)
1323 	{
1324 		VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1325 	}
1326 	else
1327 	{
1328 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1329 		const auto deferredOperation	= deferredOperationPtr.get();
1330 
1331 		const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1332 
1333 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1334 
1335 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1336 	}
1337 }
1338 
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1339 void BottomLevelAccelerationStructureKHR::deserialize (const DeviceInterface&	vk,
1340 													   const VkDevice			device,
1341 													   const VkCommandBuffer	cmdBuffer,
1342 													   SerialStorage*			storage)
1343 {
1344 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1345 	DE_ASSERT(storage != DE_NULL);
1346 
1347 	const VkCopyMemoryToAccelerationStructureInfoKHR	copyAccelerationStructureInfo	=
1348 	{
1349 		VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,	// VkStructureType							sType;
1350 		DE_NULL,															// const void*								pNext;
1351 		storage->getAddressConst(vk, device, m_buildType),					// VkDeviceOrHostAddressConstKHR			src;
1352 		*(getPtr()),														// VkAccelerationStructureKHR				dst;
1353 		VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR		mode;
1354 	};
1355 
1356 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1357 	{
1358 		vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1359 	}
1360 	else if (!m_deferredOperation)
1361 	{
1362 		VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1363 	}
1364 	else
1365 	{
1366 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1367 		const auto deferredOperation	= deferredOperationPtr.get();
1368 
1369 		const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1370 
1371 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1372 
1373 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1374 	}
1375 
1376 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1377 	{
1378 		const VkAccessFlags		accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1379 		const VkMemoryBarrier	memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1380 
1381 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1382 	}
1383 }
1384 
getPtr(void) const1385 const VkAccelerationStructureKHR* BottomLevelAccelerationStructureKHR::getPtr (void) const
1386 {
1387 	return &m_accelerationStructureKHR.get();
1388 }
1389 
prepareGeometries(const DeviceInterface & vk,const VkDevice device,std::vector<VkAccelerationStructureGeometryKHR> & accelerationStructureGeometriesKHR,std::vector<VkAccelerationStructureGeometryKHR * > & accelerationStructureGeometriesKHRPointers,std::vector<VkAccelerationStructureBuildRangeInfoKHR> & accelerationStructureBuildRangeInfoKHR,std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> & accelerationStructureGeometryMicromapsEXT,std::vector<deUint32> & maxPrimitiveCounts,VkDeviceSize vertexBufferOffset,VkDeviceSize indexBufferOffset) const1390 void BottomLevelAccelerationStructureKHR::prepareGeometries (const DeviceInterface&												vk,
1391 															 const VkDevice														device,
1392 															 std::vector<VkAccelerationStructureGeometryKHR>&					accelerationStructureGeometriesKHR,
1393 															 std::vector<VkAccelerationStructureGeometryKHR*>&					accelerationStructureGeometriesKHRPointers,
1394 															 std::vector<VkAccelerationStructureBuildRangeInfoKHR>&				accelerationStructureBuildRangeInfoKHR,
1395 															 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>&	accelerationStructureGeometryMicromapsEXT,
1396 															 std::vector<deUint32>&												maxPrimitiveCounts,
1397 															 VkDeviceSize														vertexBufferOffset,
1398 															 VkDeviceSize														indexBufferOffset) const
1399 {
1400 	accelerationStructureGeometriesKHR.resize(m_geometriesData.size());
1401 	accelerationStructureGeometriesKHRPointers.resize(m_geometriesData.size());
1402 	accelerationStructureBuildRangeInfoKHR.resize(m_geometriesData.size());
1403 	accelerationStructureGeometryMicromapsEXT.resize(m_geometriesData.size());
1404 	maxPrimitiveCounts.resize(m_geometriesData.size());
1405 
1406 	for (size_t geometryNdx = 0; geometryNdx < m_geometriesData.size(); ++geometryNdx)
1407 	{
1408 		const de::SharedPtr<RaytracedGeometryBase>&				geometryData = m_geometriesData[geometryNdx];
1409 		VkDeviceOrHostAddressConstKHR							vertexData, indexData;
1410 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1411 		{
1412 			if (getVertexBuffer() != DE_NULL)
1413 			{
1414 				vertexData			= makeDeviceOrHostAddressConstKHR(vk, device, getVertexBuffer()->get(), vertexBufferOffset);
1415 				if (m_indirectBuffer == DE_NULL )
1416 				{
1417 					vertexBufferOffset	+= deAlignSize(geometryData->getVertexByteSize(), 8);
1418 				}
1419 			}
1420 			else
1421 				vertexData			= makeDeviceOrHostAddressConstKHR(DE_NULL);
1422 
1423 			if (getIndexBuffer() != DE_NULL &&  geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1424 			{
1425 				indexData			= makeDeviceOrHostAddressConstKHR(vk, device, getIndexBuffer()->get(), indexBufferOffset);
1426 				indexBufferOffset	+= deAlignSize(geometryData->getIndexByteSize(), 8);
1427 			}
1428 			else
1429 				indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1430 		}
1431 		else
1432 		{
1433 			vertexData = makeDeviceOrHostAddressConstKHR(geometryData->getVertexPointer());
1434 			if (geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1435 				indexData = makeDeviceOrHostAddressConstKHR(geometryData->getIndexPointer());
1436 			else
1437 				indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1438 		}
1439 
1440 		VkAccelerationStructureGeometryTrianglesDataKHR	accelerationStructureGeometryTrianglesDataKHR =
1441 		{
1442 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,	//  VkStructureType					sType;
1443 			DE_NULL,																//  const void*						pNext;
1444 			geometryData->getVertexFormat(),										//  VkFormat						vertexFormat;
1445 			vertexData,																//  VkDeviceOrHostAddressConstKHR	vertexData;
1446 			geometryData->getVertexStride(),										//  VkDeviceSize					vertexStride;
1447 			static_cast<deUint32>(geometryData->getVertexCount()),					//  uint32_t						maxVertex;
1448 			geometryData->getIndexType(),											//  VkIndexType						indexType;
1449 			indexData,																//  VkDeviceOrHostAddressConstKHR	indexData;
1450 			makeDeviceOrHostAddressConstKHR(DE_NULL),								//  VkDeviceOrHostAddressConstKHR	transformData;
1451 		};
1452 
1453 		if (geometryData->getHasOpacityMicromap())
1454 			accelerationStructureGeometryTrianglesDataKHR.pNext = &geometryData->getOpacityMicromap();
1455 
1456 		const VkAccelerationStructureGeometryAabbsDataKHR		accelerationStructureGeometryAabbsDataKHR =
1457 		{
1458 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR,	//  VkStructureType					sType;
1459 			DE_NULL,															//  const void*						pNext;
1460 			vertexData,															//  VkDeviceOrHostAddressConstKHR	data;
1461 			geometryData->getAABBStride()										//  VkDeviceSize					stride;
1462 		};
1463 		const VkAccelerationStructureGeometryDataKHR			geometry = (geometryData->isTrianglesType())
1464 																		 ? makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryTrianglesDataKHR)
1465 																		 : makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryAabbsDataKHR);
1466 		const VkAccelerationStructureGeometryKHR				accelerationStructureGeometryKHR =
1467 		{
1468 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,	//  VkStructureType							sType;
1469 			DE_NULL,												//  const void*								pNext;
1470 			geometryData->getGeometryType(),						//  VkGeometryTypeKHR						geometryType;
1471 			geometry,												//  VkAccelerationStructureGeometryDataKHR	geometry;
1472 			geometryData->getGeometryFlags()						//  VkGeometryFlagsKHR						flags;
1473 		};
1474 
1475 		const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : geometryData->getPrimitiveCount());
1476 
1477 		const VkAccelerationStructureBuildRangeInfoKHR			accelerationStructureBuildRangeInfosKHR =
1478 		{
1479 			primitiveCount,	//  deUint32	primitiveCount;
1480 			0,				//  deUint32	primitiveOffset;
1481 			0,				//  deUint32	firstVertex;
1482 			0				//  deUint32	firstTransform;
1483 		};
1484 
1485 		accelerationStructureGeometriesKHR[geometryNdx]			= accelerationStructureGeometryKHR;
1486 		accelerationStructureGeometriesKHRPointers[geometryNdx]	= &accelerationStructureGeometriesKHR[geometryNdx];
1487 		accelerationStructureBuildRangeInfoKHR[geometryNdx]		= accelerationStructureBuildRangeInfosKHR;
1488 		maxPrimitiveCounts[geometryNdx]							= geometryData->getPrimitiveCount();
1489 	}
1490 }
1491 
getRequiredAllocationCount(void)1492 deUint32 BottomLevelAccelerationStructure::getRequiredAllocationCount (void)
1493 {
1494 	return BottomLevelAccelerationStructureKHR::getRequiredAllocationCount();
1495 }
1496 
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)1497 void BottomLevelAccelerationStructure::createAndBuild (const DeviceInterface&	vk,
1498 													   const VkDevice			device,
1499 													   const VkCommandBuffer	cmdBuffer,
1500 													   Allocator&				allocator,
1501 													   VkDeviceAddress			deviceAddress)
1502 {
1503 	create(vk, device, allocator, 0u, deviceAddress);
1504 	build(vk, device, cmdBuffer);
1505 }
1506 
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,BottomLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)1507 void BottomLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&				vk,
1508 														  const VkDevice						device,
1509 														  const VkCommandBuffer					cmdBuffer,
1510 														  Allocator&							allocator,
1511 														  BottomLevelAccelerationStructure*		accelerationStructure,
1512 														  VkDeviceSize							compactCopySize,
1513 														  VkDeviceAddress						deviceAddress)
1514 {
1515 	DE_ASSERT(accelerationStructure != NULL);
1516 	VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
1517 	DE_ASSERT(copiedSize != 0u);
1518 
1519 	create(vk, device, allocator, copiedSize, deviceAddress);
1520 	copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
1521 }
1522 
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)1523 void BottomLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
1524 																 const VkDevice								device,
1525 																 const VkCommandBuffer						cmdBuffer,
1526 																 Allocator&									allocator,
1527 																 SerialStorage*								storage,
1528 																 VkDeviceAddress							deviceAddress )
1529 {
1530 	DE_ASSERT(storage != NULL);
1531 	DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
1532 	create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
1533 	deserialize(vk, device, cmdBuffer, storage);
1534 }
1535 
makeBottomLevelAccelerationStructure()1536 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure ()
1537 {
1538 	return de::MovePtr<BottomLevelAccelerationStructure>(new BottomLevelAccelerationStructureKHR);
1539 }
1540 
1541 // Forward declaration
1542 struct BottomLevelAccelerationStructurePoolImpl;
1543 
1544 class BottomLevelAccelerationStructurePoolMember : public BottomLevelAccelerationStructureKHR
1545 {
1546 public:
1547 	friend class BottomLevelAccelerationStructurePool;
1548 
1549 								BottomLevelAccelerationStructurePoolMember	(BottomLevelAccelerationStructurePoolImpl& pool);
1550 								BottomLevelAccelerationStructurePoolMember	(const BottomLevelAccelerationStructurePoolMember&) = delete;
1551 								BottomLevelAccelerationStructurePoolMember	(BottomLevelAccelerationStructurePoolMember&&) = delete;
1552 	virtual						~BottomLevelAccelerationStructurePoolMember	() = default;
1553 
create(const DeviceInterface &,const VkDevice,Allocator &,VkDeviceSize,VkDeviceAddress,const void *,const MemoryRequirement &)1554 	virtual void				create										(const DeviceInterface&,
1555 																			 const VkDevice,
1556 																			 Allocator&,
1557 																			 VkDeviceSize,
1558 																			 VkDeviceAddress,
1559 																			 const void*,
1560 																			 const MemoryRequirement&) override
1561 								{
1562 									DE_ASSERT(0); // Silent this method
1563 								}
1564 	virtual auto				computeBuildSize							(const DeviceInterface&	vk,
1565 																			 const VkDevice			device,
1566 																			 const VkDeviceSize		strSize) const
1567 																			 //              accStrSize,updateScratch, buildScratch, vertexSize,   indexSize
1568 																			 -> std::tuple<VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize>;
1569 protected:
1570 	struct Info;
1571 	virtual void				preCreateSetSizesAndOffsets					(const Info&			info,
1572 																			 const VkDeviceSize		accStrSize,
1573 																			 const VkDeviceSize		updateScratchSize,
1574 																			 const VkDeviceSize		buildScratchSize);
1575 	virtual void				createAccellerationStructure				(const DeviceInterface&	vk,
1576 																			 const VkDevice			device,
1577 																			 VkDeviceAddress		deviceAddress);
1578 
1579 	virtual BufferWithMemory*	getAccelerationStructureBuffer				() const override;
1580 	virtual BufferWithMemory*	getDeviceScratchBuffer						() const override;
1581 	virtual std::vector<deUint8>*	getHostScratchBuffer					() const override;
1582 	virtual BufferWithMemory*	getVertexBuffer								() const override;
1583 	virtual BufferWithMemory*	getIndexBuffer								() const override;
1584 
getAccelerationStructureBufferOffset() const1585 	virtual VkDeviceSize		getAccelerationStructureBufferOffset		() const override { return m_info.accStrOffset; }
getDeviceScratchBufferOffset() const1586 	virtual VkDeviceSize		getDeviceScratchBufferOffset				() const override { return m_info.buildScratchBuffOffset; }
getVertexBufferOffset() const1587 	virtual VkDeviceSize		getVertexBufferOffset						() const override { return m_info.vertBuffOffset; }
getIndexBufferOffset() const1588 	virtual VkDeviceSize		getIndexBufferOffset						() const override { return m_info.indexBuffOffset; }
1589 
1590 	BottomLevelAccelerationStructurePoolImpl&	m_pool;
1591 
1592 	struct Info
1593 	{
1594 		deUint32				accStrIndex;
1595 		VkDeviceSize			accStrOffset;
1596 		deUint32				vertBuffIndex;
1597 		VkDeviceSize			vertBuffOffset;
1598 		deUint32				indexBuffIndex;
1599 		VkDeviceSize			indexBuffOffset;
1600 		deUint32				buildScratchBuffIndex;
1601 		VkDeviceSize			buildScratchBuffOffset;
1602 	}											m_info;
1603 };
1604 
negz(const X &)1605 template<class X> inline X negz (const X&)
1606 {
1607 	return (~static_cast<X>(0));
1608 }
isnegz(const X & x)1609 template<class X> inline bool isnegz (const X& x)
1610 {
1611 	return x == negz(x);
1612 }
make_unsigned(const Y & y)1613 template<class Y> inline auto make_unsigned(const Y& y) -> typename std::make_unsigned<Y>::type
1614 {
1615 	return static_cast<typename std::make_unsigned<Y>::type>(y);
1616 }
1617 
BottomLevelAccelerationStructurePoolMember(BottomLevelAccelerationStructurePoolImpl & pool)1618 BottomLevelAccelerationStructurePoolMember::BottomLevelAccelerationStructurePoolMember	(BottomLevelAccelerationStructurePoolImpl& pool)
1619 	: m_pool	(pool)
1620 	, m_info	{}
1621 {
1622 }
1623 
1624 struct BottomLevelAccelerationStructurePoolImpl
1625 {
1626 	BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePoolImpl&&) = delete;
1627 	BottomLevelAccelerationStructurePoolImpl (const BottomLevelAccelerationStructurePoolImpl&) = delete;
1628 	BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool);
1629 
1630 	BottomLevelAccelerationStructurePool&			m_pool;
1631 	std::vector<de::SharedPtr<BufferWithMemory>>	m_accellerationStructureBuffers;
1632 	de::SharedPtr<BufferWithMemory>					m_deviceScratchBuffer;
1633 	de::UniquePtr<std::vector<deUint8>>				m_hostScratchBuffer;
1634 	std::vector<de::SharedPtr<BufferWithMemory>>	m_vertexBuffers;
1635 	std::vector<de::SharedPtr<BufferWithMemory>>	m_indexBuffers;
1636 };
BottomLevelAccelerationStructurePoolImpl(BottomLevelAccelerationStructurePool & pool)1637 BottomLevelAccelerationStructurePoolImpl::BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool)
1638 	: m_pool							(pool)
1639 	, m_accellerationStructureBuffers	()
1640 	, m_deviceScratchBuffer				()
1641 	, m_hostScratchBuffer				(new std::vector<deUint8>)
1642 	, m_vertexBuffers					()
1643 	, m_indexBuffers					()
1644 {
1645 }
getAccelerationStructureBuffer() const1646 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getAccelerationStructureBuffer () const
1647 {
1648 	BufferWithMemory* result = nullptr;
1649 	if (m_pool.m_accellerationStructureBuffers.size())
1650 	{
1651 		DE_ASSERT(!isnegz(m_info.accStrIndex));
1652 		result = m_pool.m_accellerationStructureBuffers[m_info.accStrIndex].get();
1653 	}
1654 	return result;
1655 }
getDeviceScratchBuffer() const1656 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getDeviceScratchBuffer () const
1657 {
1658 	DE_ASSERT(m_info.buildScratchBuffIndex == 0);
1659 	return m_pool.m_deviceScratchBuffer.get();
1660 }
getHostScratchBuffer() const1661 std::vector<deUint8>* BottomLevelAccelerationStructurePoolMember::getHostScratchBuffer () const
1662 {
1663 	return this->m_buildScratchSize ? m_pool.m_hostScratchBuffer.get() : nullptr;
1664 }
1665 
getVertexBuffer() const1666 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getVertexBuffer () const
1667 {
1668 	BufferWithMemory* result = nullptr;
1669 	if (m_pool.m_vertexBuffers.size())
1670 	{
1671 		DE_ASSERT(!isnegz(m_info.vertBuffIndex));
1672 		result = m_pool.m_vertexBuffers[m_info.vertBuffIndex].get();
1673 	}
1674 	return result;
1675 }
getIndexBuffer() const1676 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getIndexBuffer () const
1677 {
1678 	BufferWithMemory* result = nullptr;
1679 	if (m_pool.m_indexBuffers.size())
1680 	{
1681 		DE_ASSERT(!isnegz(m_info.indexBuffIndex));
1682 		result = m_pool.m_indexBuffers[m_info.indexBuffIndex].get();
1683 	}
1684 	return result;
1685 }
1686 
1687 struct BottomLevelAccelerationStructurePool::Impl : BottomLevelAccelerationStructurePoolImpl
1688 {
1689 	friend class BottomLevelAccelerationStructurePool;
1690 	friend class BottomLevelAccelerationStructurePoolMember;
1691 
Implvk::BottomLevelAccelerationStructurePool::Impl1692 	Impl (BottomLevelAccelerationStructurePool& pool)
1693 		: BottomLevelAccelerationStructurePoolImpl(pool) { }
1694 };
1695 
BottomLevelAccelerationStructurePool()1696 BottomLevelAccelerationStructurePool::BottomLevelAccelerationStructurePool ()
1697 	: m_batchStructCount	(4)
1698 	, m_batchGeomCount		(0)
1699 	, m_infos				()
1700 	, m_structs				()
1701 	, m_createOnce			(false)
1702 	, m_tryCachedMemory		(true)
1703 	, m_structsBuffSize		(0)
1704 	, m_updatesScratchSize	(0)
1705 	, m_buildsScratchSize	(0)
1706 	, m_verticesSize		(0)
1707 	, m_indicesSize			(0)
1708 	, m_impl				(new Impl(*this))
1709 {
1710 }
1711 
~BottomLevelAccelerationStructurePool()1712 BottomLevelAccelerationStructurePool::~BottomLevelAccelerationStructurePool()
1713 {
1714 	delete m_impl;
1715 }
1716 
batchStructCount(const deUint32 & value)1717 void BottomLevelAccelerationStructurePool::batchStructCount (const deUint32& value)
1718 {
1719 	DE_ASSERT(value >= 1); m_batchStructCount = value;
1720 }
1721 
add(VkDeviceSize structureSize,VkDeviceAddress deviceAddress)1722 auto BottomLevelAccelerationStructurePool::add (VkDeviceSize		structureSize,
1723 												VkDeviceAddress		deviceAddress) -> BottomLevelAccelerationStructurePool::BlasPtr
1724 {
1725 	// Prevent a programmer from calling this method after batchCreate(...) method has been called.
1726 	if (m_createOnce) DE_ASSERT(0);
1727 
1728 	auto blas = new BottomLevelAccelerationStructurePoolMember(*m_impl);
1729 	m_infos.push_back({structureSize, deviceAddress});
1730 	m_structs.emplace_back(blas);
1731 	return m_structs.back();
1732 }
1733 
adjustBatchCount(const DeviceInterface & vkd,const VkDevice device,const std::vector<BottomLevelAccelerationStructurePool::BlasPtr> & structs,const std::vector<BottomLevelAccelerationStructurePool::BlasInfo> & infos,const VkDeviceSize maxBufferSize,deUint32 (& result)[4])1734 void adjustBatchCount (const DeviceInterface&		vkd,
1735 					   const VkDevice				device,
1736 					   const std::vector<BottomLevelAccelerationStructurePool::BlasPtr>& structs,
1737 					   const std::vector<BottomLevelAccelerationStructurePool::BlasInfo>& infos,
1738 					   const VkDeviceSize			maxBufferSize,
1739 					   deUint32						(&result)[4])
1740 {
1741 	tcu::Vector<VkDeviceSize, 4>	sizes(0);
1742 	tcu::Vector<VkDeviceSize, 4>	sums(0);
1743 	tcu::Vector<deUint32, 4>		tmps(0);
1744 	tcu::Vector<deUint32, 4>		batches(0);
1745 
1746 	VkDeviceSize	updateScratchSize = 0;	static_cast<void>(updateScratchSize);	// not used yet, disabled for future implementation
1747 
1748 	auto updateIf = [&](deUint32 c)
1749 	{
1750 		if (sums[c] + sizes[c] <= maxBufferSize)
1751 		{
1752 			sums[c] += sizes[c];
1753 			tmps[c] += 1;
1754 
1755 			batches[c] = std::max(tmps[c], batches[c]);
1756 		}
1757 		else
1758 		{
1759 			sums[c] = 0;
1760 			tmps[c] = 0;
1761 		}
1762 	};
1763 
1764 	const deUint32	maxIter	= static_cast<deUint32>(structs.size());
1765 	for (deUint32 i = 0; i < maxIter; ++i)
1766 	{
1767 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(structs[i].get());
1768 		std::tie(sizes[0], updateScratchSize, sizes[1], sizes[2], sizes[3]) = str.computeBuildSize(vkd, device, infos[i].structureSize);
1769 
1770 		updateIf(0);
1771 		updateIf(1);
1772 		updateIf(2);
1773 		updateIf(3);
1774 	}
1775 
1776 	result[0] = std::max(batches[0], 1u);
1777 	result[1] = std::max(batches[1], 1u);
1778 	result[2] = std::max(batches[2], 1u);
1779 	result[3] = std::max(batches[3], 1u);
1780 }
1781 
getAllocationCount() const1782 size_t BottomLevelAccelerationStructurePool::getAllocationCount () const
1783 {
1784 	return m_impl->m_accellerationStructureBuffers.size()
1785 			+ m_impl->m_vertexBuffers.size()
1786 			+ m_impl->m_indexBuffers.size()
1787 			+ 1 /* for scratch buffer */;
1788 }
1789 
getAllocationCount(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize maxBufferSize) const1790 size_t BottomLevelAccelerationStructurePool::getAllocationCount (const DeviceInterface&		vk,
1791 																 const VkDevice				device,
1792 																 const VkDeviceSize			maxBufferSize) const
1793 {
1794 	DE_ASSERT(m_structs.size() != 0);
1795 
1796 	std::map<deUint32, VkDeviceSize>	accStrSizes;
1797 	std::map<deUint32, VkDeviceSize>	vertBuffSizes;
1798 	std::map<deUint32, VkDeviceSize>	indexBuffSizes;
1799 	std::map<deUint32, VkDeviceSize>	scratchBuffSizes;
1800 
1801 	const deUint32	allStructsCount		= structCount();
1802 
1803 	deUint32		batchStructCount	= m_batchStructCount;
1804 	deUint32		batchScratchCount	= m_batchStructCount;
1805 	deUint32		batchVertexCount	= m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1806 	deUint32		batchIndexCount		= batchVertexCount;
1807 
1808 	if (!isnegz(maxBufferSize))
1809 	{
1810 		deUint32	batches[4];
1811 		adjustBatchCount(vk, device, m_structs, m_infos, maxBufferSize, batches);
1812 		batchStructCount	= batches[0];
1813 		batchScratchCount	= batches[1];
1814 		batchVertexCount	= batches[2];
1815 		batchIndexCount		= batches[3];
1816 	}
1817 
1818 	deUint32		iStr				= 0;
1819 	deUint32		iScratch			= 0;
1820 	deUint32		iVertex				= 0;
1821 	deUint32		iIndex				= 0;
1822 
1823 	VkDeviceSize	strSize				= 0;
1824 	VkDeviceSize	updateScratchSize	= 0;
1825 	VkDeviceSize	buildScratchSize	= 0;
1826 	VkDeviceSize	vertexSize			= 0;
1827 	VkDeviceSize	indexSize			= 0;
1828 
1829 	for (; iStr < allStructsCount; ++iStr)
1830 	{
1831 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1832 		std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[iStr].structureSize);
1833 
1834 		{
1835 			const VkDeviceSize	alignedStrSize	= deAlign64(strSize, 256);
1836 			const deUint32		accStrIndex		= (iStr / batchStructCount);
1837 			accStrSizes[accStrIndex]	+= alignedStrSize;
1838 		}
1839 
1840 		if (buildScratchSize != 0)
1841 		{
1842 			const VkDeviceSize	alignedBuilsScratchSize	= deAlign64(buildScratchSize, 256);
1843 			const deUint32		scratchBuffIndex		= (iScratch/ batchScratchCount);
1844 			scratchBuffSizes[scratchBuffIndex]	+= alignedBuilsScratchSize;
1845 			iScratch							+= 1;
1846 		}
1847 
1848 		if (vertexSize != 0)
1849 		{
1850 			const VkDeviceSize	alignedVertBuffSize	= deAlign64(vertexSize, 8);
1851 			const deUint32		vertBuffIndex		= (iVertex / batchVertexCount);
1852 			vertBuffSizes[vertBuffIndex]	+= alignedVertBuffSize;
1853 			iVertex							+= 1;
1854 		}
1855 
1856 		if (indexSize != 0)
1857 		{
1858 			const VkDeviceSize	alignedIndexBuffSize	= deAlign64(indexSize, 8);
1859 			const deUint32		indexBuffIndex			= (iIndex / batchIndexCount);
1860 			indexBuffSizes[indexBuffIndex]	+= alignedIndexBuffSize;
1861 			iIndex							+= 1;
1862 		}
1863 	}
1864 
1865 	return accStrSizes.size()
1866 			+ vertBuffSizes.size()
1867 			+ indexBuffSizes.size()
1868 			+ scratchBuffSizes.size();
1869 }
1870 
getAllocationSizes(const DeviceInterface & vk,const VkDevice device) const1871 tcu::Vector<VkDeviceSize, 4> BottomLevelAccelerationStructurePool::getAllocationSizes (const DeviceInterface&		vk,
1872 																					   const VkDevice				device) const
1873 {
1874 	if (m_structsBuffSize)
1875 	{
1876 		return tcu::Vector<VkDeviceSize, 4>(m_structsBuffSize, m_buildsScratchSize, m_verticesSize, m_indicesSize);
1877 	}
1878 
1879 	VkDeviceSize strSize				= 0;
1880 	VkDeviceSize updateScratchSize		= 0;	static_cast<void>(updateScratchSize);		// not used yet, disabled for future implementation
1881 	VkDeviceSize buildScratchSize		= 0;
1882 	VkDeviceSize vertexSize				= 0;
1883 	VkDeviceSize indexSize				= 0;
1884 	VkDeviceSize sumStrSize				= 0;
1885 	VkDeviceSize sumUpdateScratchSize	= 0;	static_cast<void>(sumUpdateScratchSize);	// not used yet, disabled for future implementation
1886 	VkDeviceSize sumBuildScratchSize	= 0;
1887 	VkDeviceSize sumVertexSize			= 0;
1888 	VkDeviceSize sumIndexSize			= 0;
1889 	for (size_t i = 0; i < structCount(); ++i)
1890 	{
1891 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[i].get());
1892 		std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[i].structureSize);
1893 		sumStrSize				+= deAlign64(strSize, 256);
1894 		//sumUpdateScratchSize	+= deAlign64(updateScratchSize, 256);	not used yet, disabled for future implementation
1895 		sumBuildScratchSize		+= deAlign64(buildScratchSize, 256);
1896 		sumVertexSize			+= deAlign64(vertexSize, 8);
1897 		sumIndexSize			+= deAlign64(indexSize, 8);
1898 	}
1899 	return tcu::Vector<VkDeviceSize, 4>(sumStrSize, sumBuildScratchSize, sumVertexSize, sumIndexSize);
1900 }
1901 
batchCreate(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator)1902 void BottomLevelAccelerationStructurePool::batchCreate (const DeviceInterface&		vkd,
1903 														const VkDevice				device,
1904 														Allocator&					allocator)
1905 {
1906 	batchCreateAdjust(vkd, device, allocator, negz<VkDeviceSize>(0));
1907 }
1908 
batchCreateAdjust(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator,const VkDeviceSize maxBufferSize)1909 void BottomLevelAccelerationStructurePool::batchCreateAdjust (const DeviceInterface&	vkd,
1910 															  const VkDevice			device,
1911 															  Allocator&				allocator,
1912 															  const VkDeviceSize		maxBufferSize)
1913 {
1914 	// Prevent a programmer from calling this method more than once.
1915 	if (m_createOnce) DE_ASSERT(0);
1916 
1917 	m_createOnce = true;
1918 	DE_ASSERT(m_structs.size() != 0);
1919 
1920 	auto createAccellerationStructureBuffer = [&](VkDeviceSize bufferSize) -> typename std::add_pointer<BufferWithMemory>::type
1921 	{
1922 		BufferWithMemory* res = nullptr;
1923 		const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1924 
1925 		if (m_tryCachedMemory) try
1926 		{
1927 			res = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1928 		}
1929 		catch (const tcu::NotSupportedError&)
1930 		{
1931 			res = nullptr;
1932 		}
1933 
1934 		return (nullptr != res)
1935 				? res
1936 				: (new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1937 	};
1938 
1939 	auto createDeviceScratchBuffer = [&](VkDeviceSize bufferSize) -> de::SharedPtr<BufferWithMemory>
1940 	{
1941 		const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1942 		BufferWithMemory* p = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1943 		return de::SharedPtr<BufferWithMemory>(p);
1944 	};
1945 
1946 	std::map<deUint32, VkDeviceSize>	accStrSizes;
1947 	std::map<deUint32, VkDeviceSize>	vertBuffSizes;
1948 	std::map<deUint32, VkDeviceSize>	indexBuffSizes;
1949 
1950 	const deUint32	allStructsCount		= structCount();
1951 	deUint32		iterKey				= 0;
1952 
1953 	deUint32		batchStructCount	= m_batchStructCount;
1954 	deUint32		batchVertexCount	= m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1955 	deUint32		batchIndexCount		= batchVertexCount;
1956 
1957 	if (!isnegz(maxBufferSize))
1958 	{
1959 		deUint32	batches[4];
1960 		adjustBatchCount(vkd, device, m_structs, m_infos, maxBufferSize, batches);
1961 		batchStructCount	= batches[0];
1962 		// batches[1]: batchScratchCount
1963 		batchVertexCount	= batches[2];
1964 		batchIndexCount		= batches[3];
1965 	}
1966 
1967 	deUint32		iStr				= 0;
1968 	deUint32		iVertex				= 0;
1969 	deUint32		iIndex				= 0;
1970 
1971 	VkDeviceSize	strSize				= 0;
1972 	VkDeviceSize	updateScratchSize	= 0;
1973 	VkDeviceSize	buildScratchSize	= 0;
1974 	VkDeviceSize	maxBuildScratchSize	= 0;
1975 	VkDeviceSize	vertexSize			= 0;
1976 	VkDeviceSize	indexSize			= 0;
1977 
1978 	VkDeviceSize	strOffset			= 0;
1979 	VkDeviceSize	vertexOffset		= 0;
1980 	VkDeviceSize	indexOffset			= 0;
1981 
1982 	deUint32		hostStructCount		= 0;
1983 	deUint32		deviceStructCount	= 0;
1984 
1985 	for (; iStr < allStructsCount; ++iStr)
1986 	{
1987 		BottomLevelAccelerationStructurePoolMember::Info info{};
1988 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1989 		std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vkd, device, m_infos[iStr].structureSize);
1990 
1991 		++(str.getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR ? hostStructCount : deviceStructCount);
1992 
1993 		{
1994 			const VkDeviceSize	alignedStrSize	= deAlign64(strSize, 256);
1995 			const deUint32		accStrIndex		= (iStr / batchStructCount);
1996 			if (iStr != 0 && (iStr % batchStructCount) == 0)
1997 			{
1998 				strOffset				= 0;
1999 			}
2000 
2001 			info.accStrIndex			= accStrIndex;
2002 			info.accStrOffset			= strOffset;
2003 			accStrSizes[accStrIndex]	+= alignedStrSize;
2004 			strOffset					+= alignedStrSize;
2005 			m_structsBuffSize			+= alignedStrSize;
2006 		}
2007 
2008 		if (buildScratchSize != 0)
2009 		{
2010 			maxBuildScratchSize = std::max(maxBuildScratchSize, make_unsigned(deAlign64(buildScratchSize, 256u)));
2011 
2012 			info.buildScratchBuffIndex		= 0;
2013 			info.buildScratchBuffOffset		= 0;
2014 		}
2015 
2016 		if (vertexSize != 0)
2017 		{
2018 			const VkDeviceSize	alignedVertBuffSize	= deAlign64(vertexSize, 8);
2019 			const deUint32		vertBuffIndex		= (iVertex / batchVertexCount);
2020 			if (iVertex != 0 && (iVertex % batchVertexCount) == 0)
2021 			{
2022 				vertexOffset				= 0;
2023 			}
2024 
2025 			info.vertBuffIndex				= vertBuffIndex;
2026 			info.vertBuffOffset				= vertexOffset;
2027 			vertBuffSizes[vertBuffIndex]	+= alignedVertBuffSize;
2028 			vertexOffset					+= alignedVertBuffSize;
2029 			m_verticesSize					+= alignedVertBuffSize;
2030 			iVertex							+= 1;
2031 		}
2032 
2033 		if (indexSize != 0)
2034 		{
2035 			const VkDeviceSize	alignedIndexBuffSize	= deAlign64(indexSize, 8);
2036 			const deUint32		indexBuffIndex			= (iIndex / batchIndexCount);
2037 			if (iIndex != 0 && (iIndex % batchIndexCount) == 0)
2038 			{
2039 				indexOffset					= 0;
2040 			}
2041 
2042 			info.indexBuffIndex				= indexBuffIndex;
2043 			info.indexBuffOffset			= indexOffset;
2044 			indexBuffSizes[indexBuffIndex]	+= alignedIndexBuffSize;
2045 			indexOffset						+= alignedIndexBuffSize;
2046 			m_indicesSize					+= alignedIndexBuffSize;
2047 			iIndex							+= 1;
2048 		}
2049 
2050 		str.preCreateSetSizesAndOffsets(info, strSize, updateScratchSize, buildScratchSize);
2051 	}
2052 
2053 	for (iterKey = 0; iterKey < static_cast<deUint32>(accStrSizes.size()); ++iterKey)
2054 	{
2055 		m_impl->m_accellerationStructureBuffers.emplace_back(createAccellerationStructureBuffer(accStrSizes.at(iterKey)));
2056 	}
2057 	for (iterKey = 0; iterKey < static_cast<deUint32>(vertBuffSizes.size()); ++iterKey)
2058 	{
2059 		m_impl->m_vertexBuffers.emplace_back(createVertexBuffer(vkd, device, allocator, vertBuffSizes.at(iterKey)));
2060 	}
2061 	for (iterKey = 0; iterKey < static_cast<deUint32>(indexBuffSizes.size()); ++iterKey)
2062 	{
2063 		m_impl->m_indexBuffers.emplace_back(createIndexBuffer(vkd, device, allocator, indexBuffSizes.at(iterKey)));
2064 	}
2065 
2066 	if (maxBuildScratchSize)
2067 	{
2068 		if (hostStructCount)	m_impl->m_hostScratchBuffer->resize(static_cast<size_t>(maxBuildScratchSize));
2069 		if (deviceStructCount)	m_impl->m_deviceScratchBuffer = createDeviceScratchBuffer(maxBuildScratchSize);
2070 
2071 		m_buildsScratchSize = maxBuildScratchSize;
2072 	}
2073 
2074 	for (iterKey = 0; iterKey < allStructsCount; ++iterKey)
2075 	{
2076 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iterKey].get());
2077 		str.createAccellerationStructure(vkd, device, m_infos[iterKey].deviceAddress);
2078 	}
2079 }
2080 
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandBuffer cmdBuffer)2081 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface&	vk,
2082 													   const VkDevice			device,
2083 													   VkCommandBuffer			cmdBuffer)
2084 {
2085 	for (const auto& str : m_structs)
2086 	{
2087 		str->build(vk, device, cmdBuffer);
2088 	}
2089 }
2090 
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandPool cmdPool,VkQueue queue)2091 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface&	vk,
2092 													   const VkDevice			device,
2093 													   VkCommandPool			cmdPool,
2094 													   VkQueue					queue)
2095 {
2096 	const deUint32			limit	= 10000u;
2097 	const deUint32			count	= structCount();
2098 	std::vector<BlasPtr>	buildingOnDevice;
2099 
2100 	auto buildOnDevice = [&]() -> void
2101 	{
2102 		Move<VkCommandBuffer>	cmd = allocateCommandBuffer(vk, device, cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2103 
2104 		beginCommandBuffer(vk, *cmd, 0u);
2105 			for (const auto& str : buildingOnDevice)
2106 				str->build(vk, device, *cmd);
2107 		endCommandBuffer(vk, *cmd);
2108 
2109 		submitCommandsAndWait(vk, device, queue, *cmd);
2110 		vk.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2111 	};
2112 
2113 	buildingOnDevice.reserve(limit);
2114 	for (deUint32 i = 0; i < count; ++i)
2115 	{
2116 		auto str = m_structs[i];
2117 
2118 		if (str->getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
2119 			str->build(vk, device, DE_NULL);
2120 		else
2121 			buildingOnDevice.emplace_back(str);
2122 
2123 		if ( buildingOnDevice.size() == limit || (count - 1) == i)
2124 		{
2125 			buildOnDevice();
2126 			buildingOnDevice.clear();
2127 		}
2128 	}
2129 }
2130 
computeBuildSize(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize strSize) const2131 auto BottomLevelAccelerationStructurePoolMember::computeBuildSize (const DeviceInterface&	vk,
2132 																   const VkDevice			device,
2133 																   const VkDeviceSize		strSize) const
2134 																   //              accStrSize,updateScratch,buildScratch, vertexSize, indexSize
2135 																   -> std::tuple<VkDeviceSize, VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize>
2136 {
2137 	DE_ASSERT(!m_geometriesData.empty() !=  !(strSize == 0)); // logical xor
2138 
2139 	std::tuple<VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize> result(deAlign64(strSize, 256), 0, 0, 0, 0);
2140 
2141 	if (!m_geometriesData.empty())
2142 	{
2143 		std::vector<VkAccelerationStructureGeometryKHR>			accelerationStructureGeometriesKHR;
2144 		std::vector<VkAccelerationStructureGeometryKHR*>		accelerationStructureGeometriesKHRPointers;
2145 		std::vector<VkAccelerationStructureBuildRangeInfoKHR>	accelerationStructureBuildRangeInfoKHR;
2146 		std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
2147 		std::vector<deUint32>									maxPrimitiveCounts;
2148 		prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
2149 
2150 		const VkAccelerationStructureGeometryKHR*				accelerationStructureGeometriesKHRPointer	= accelerationStructureGeometriesKHR.data();
2151 		const VkAccelerationStructureGeometryKHR* const*		accelerationStructureGeometry				= accelerationStructureGeometriesKHRPointers.data();
2152 
2153 		VkAccelerationStructureBuildGeometryInfoKHR	accelerationStructureBuildGeometryInfoKHR	=
2154 		{
2155 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,			//  VkStructureType										sType;
2156 			DE_NULL,																	//  const void*											pNext;
2157 			VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,							//  VkAccelerationStructureTypeKHR						type;
2158 			m_buildFlags,																//  VkBuildAccelerationStructureFlagsKHR				flags;
2159 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,								//  VkBuildAccelerationStructureModeKHR					mode;
2160 			DE_NULL,																	//  VkAccelerationStructureKHR							srcAccelerationStructure;
2161 			DE_NULL,																	//  VkAccelerationStructureKHR							dstAccelerationStructure;
2162 			static_cast<deUint32>(accelerationStructureGeometriesKHR.size()),			//  deUint32											geometryCount;
2163 			m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,	//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2164 			m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,				//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2165 			makeDeviceOrHostAddressKHR(DE_NULL)											//  VkDeviceOrHostAddressKHR							scratchData;
2166 		};
2167 
2168 		VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2169 		{
2170 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2171 			DE_NULL,														//  const void*		pNext;
2172 			0,																//  VkDeviceSize	accelerationStructureSize;
2173 			0,																//  VkDeviceSize	updateScratchSize;
2174 			0																//  VkDeviceSize	buildScratchSize;
2175 		};
2176 
2177 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2178 
2179 		std::get<0>(result) = sizeInfo.accelerationStructureSize;
2180 		std::get<1>(result) = sizeInfo.updateScratchSize;
2181 		std::get<2>(result) = sizeInfo.buildScratchSize;
2182 		std::get<3>(result) = getVertexBufferSize(m_geometriesData);
2183 		std::get<4>(result) = getIndexBufferSize(m_geometriesData);
2184 	}
2185 
2186 	return result;
2187 }
2188 
preCreateSetSizesAndOffsets(const Info & info,const VkDeviceSize accStrSize,const VkDeviceSize updateScratchSize,const VkDeviceSize buildScratchSize)2189 void BottomLevelAccelerationStructurePoolMember::preCreateSetSizesAndOffsets (const Info&			info,
2190 																			  const VkDeviceSize	accStrSize,
2191 																			  const VkDeviceSize	updateScratchSize,
2192 																			  const VkDeviceSize	buildScratchSize)
2193 {
2194 	m_info				= info;
2195 	m_structureSize		= accStrSize;
2196 	m_updateScratchSize	= updateScratchSize;
2197 	m_buildScratchSize	= buildScratchSize;
2198 }
2199 
createAccellerationStructure(const DeviceInterface & vk,const VkDevice device,VkDeviceAddress deviceAddress)2200 void BottomLevelAccelerationStructurePoolMember::createAccellerationStructure (const DeviceInterface&	vk,
2201 																			   const VkDevice			device,
2202 																			   VkDeviceAddress			deviceAddress)
2203 {
2204 	const VkAccelerationStructureTypeKHR		structureType						= (m_createGeneric
2205 																					   ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2206 																					   : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
2207 	const VkAccelerationStructureCreateInfoKHR	accelerationStructureCreateInfoKHR
2208 	{
2209 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,						//  VkStructureType											sType;
2210 		DE_NULL,																		//  const void*												pNext;
2211 		m_createFlags,																	//  VkAccelerationStructureCreateFlagsKHR					createFlags;
2212 		getAccelerationStructureBuffer()->get(),										//  VkBuffer												buffer;
2213 		getAccelerationStructureBufferOffset(),											//  VkDeviceSize											offset;
2214 		m_structureSize,																//  VkDeviceSize											size;
2215 		structureType,																	//  VkAccelerationStructureTypeKHR							type;
2216 		deviceAddress																	//  VkDeviceAddress											deviceAddress;
2217 	};
2218 
2219 	m_accelerationStructureKHR	= createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2220 }
2221 
~TopLevelAccelerationStructure()2222 TopLevelAccelerationStructure::~TopLevelAccelerationStructure ()
2223 {
2224 }
2225 
TopLevelAccelerationStructure()2226 TopLevelAccelerationStructure::TopLevelAccelerationStructure ()
2227 	: m_structureSize		(0u)
2228 	, m_updateScratchSize	(0u)
2229 	, m_buildScratchSize	(0u)
2230 {
2231 }
2232 
setInstanceCount(const size_t instanceCount)2233 void TopLevelAccelerationStructure::setInstanceCount (const size_t instanceCount)
2234 {
2235 	m_bottomLevelInstances.reserve(instanceCount);
2236 	m_instanceData.reserve(instanceCount);
2237 }
2238 
addInstance(de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,const VkTransformMatrixKHR & matrix,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags)2239 void TopLevelAccelerationStructure::addInstance (de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelStructure,
2240 												 const VkTransformMatrixKHR&						matrix,
2241 												 deUint32											instanceCustomIndex,
2242 												 deUint32											mask,
2243 												 deUint32											instanceShaderBindingTableRecordOffset,
2244 												 VkGeometryInstanceFlagsKHR							flags)
2245 {
2246 	m_bottomLevelInstances.push_back(bottomLevelStructure);
2247 	m_instanceData.push_back(InstanceData(matrix, instanceCustomIndex, mask, instanceShaderBindingTableRecordOffset, flags));
2248 }
2249 
getStructureBuildSizes() const2250 VkAccelerationStructureBuildSizesInfoKHR TopLevelAccelerationStructure::getStructureBuildSizes () const
2251 {
2252 	return
2253 	{
2254 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2255 		DE_NULL,														//  const void*		pNext;
2256 		m_structureSize,												//  VkDeviceSize	accelerationStructureSize;
2257 		m_updateScratchSize,											//  VkDeviceSize	updateScratchSize;
2258 		m_buildScratchSize												//  VkDeviceSize	buildScratchSize;
2259 	};
2260 }
2261 
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)2262 void TopLevelAccelerationStructure::createAndBuild (const DeviceInterface&	vk,
2263 													const VkDevice			device,
2264 													const VkCommandBuffer	cmdBuffer,
2265 													Allocator&				allocator,
2266 													VkDeviceAddress			deviceAddress)
2267 {
2268 	create(vk, device, allocator, 0u, deviceAddress);
2269 	build(vk, device, cmdBuffer);
2270 }
2271 
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,TopLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)2272 void TopLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&				vk,
2273 													   const VkDevice						device,
2274 													   const VkCommandBuffer				cmdBuffer,
2275 													   Allocator&							allocator,
2276 													   TopLevelAccelerationStructure*		accelerationStructure,
2277 													   VkDeviceSize							compactCopySize,
2278 													   VkDeviceAddress						deviceAddress)
2279 {
2280 	DE_ASSERT(accelerationStructure != NULL);
2281 	VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
2282 	DE_ASSERT(copiedSize != 0u);
2283 
2284 	create(vk, device, allocator, copiedSize, deviceAddress);
2285 	copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
2286 }
2287 
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)2288 void TopLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface&					vk,
2289 															  const VkDevice							device,
2290 															  const VkCommandBuffer						cmdBuffer,
2291 															  Allocator&								allocator,
2292 															  SerialStorage*							storage,
2293 															  VkDeviceAddress							deviceAddress)
2294 {
2295 	DE_ASSERT(storage != NULL);
2296 	DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
2297 	create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
2298 	if (storage->hasDeepFormat()) createAndDeserializeBottoms(vk, device, cmdBuffer, allocator, storage);
2299 	deserialize(vk, device, cmdBuffer, storage);
2300 }
2301 
createInstanceBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelInstances,std::vector<InstanceData> instanceData,const bool tryCachedMemory)2302 BufferWithMemory* createInstanceBuffer (const DeviceInterface&											vk,
2303 										const VkDevice													device,
2304 										Allocator&														allocator,
2305 										std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	bottomLevelInstances,
2306 										std::vector<InstanceData>										instanceData,
2307 										const bool														tryCachedMemory)
2308 {
2309 	DE_ASSERT(bottomLevelInstances.size() != 0);
2310 	DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2311 	DE_UNREF(instanceData);
2312 
2313 	BufferWithMemory*			result				= nullptr;
2314 	const VkDeviceSize			bufferSizeBytes		= bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2315 	const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2316 	if (tryCachedMemory) try
2317 	{
2318 		result = new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2319 	}
2320 	catch (const tcu::NotSupportedError&)
2321 	{
2322 		result = nullptr;
2323 	}
2324 	return result
2325 			? result
2326 			: new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2327 }
2328 
updateSingleInstance(const DeviceInterface & vk,const VkDevice device,const BottomLevelAccelerationStructure & bottomLevelAccelerationStructure,const InstanceData & instanceData,deUint8 * bufferLocation,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2329 void updateSingleInstance (const DeviceInterface&					vk,
2330 						   const VkDevice							device,
2331 						   const BottomLevelAccelerationStructure&	bottomLevelAccelerationStructure,
2332 						   const InstanceData&						instanceData,
2333 						   deUint8*									bufferLocation,
2334 						   VkAccelerationStructureBuildTypeKHR		buildType,
2335 						   bool										inactiveInstances)
2336 {
2337 	const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
2338 
2339 	// This part needs to be fixed once a new version of the VkAccelerationStructureInstanceKHR will be added to vkStructTypes.inl
2340 	VkDeviceAddress accelerationStructureAddress;
2341 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2342 	{
2343 		VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
2344 		{
2345 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,	// VkStructureType				sType;
2346 			DE_NULL,															// const void*					pNext;
2347 			accelerationStructureKHR											// VkAccelerationStructureKHR	accelerationStructure;
2348 		};
2349 		accelerationStructureAddress = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
2350 	}
2351 
2352 	deUint64 structureReference;
2353 	if (inactiveInstances)
2354 	{
2355 		// Instances will be marked inactive by making their references VK_NULL_HANDLE or having address zero.
2356 		structureReference = 0ull;
2357 	}
2358 	else
2359 	{
2360 		structureReference	= (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2361 							? deUint64(accelerationStructureAddress)
2362 							: deUint64(accelerationStructureKHR.getInternal());
2363 	}
2364 
2365 	VkAccelerationStructureInstanceKHR	accelerationStructureInstanceKHR = makeVkAccelerationStructureInstanceKHR
2366 	(
2367 		instanceData.matrix,									//  VkTransformMatrixKHR		transform;
2368 		instanceData.instanceCustomIndex,						//  deUint32					instanceCustomIndex:24;
2369 		instanceData.mask,										//  deUint32					mask:8;
2370 		instanceData.instanceShaderBindingTableRecordOffset,	//  deUint32					instanceShaderBindingTableRecordOffset:24;
2371 		instanceData.flags,										//  VkGeometryInstanceFlagsKHR	flags:8;
2372 		structureReference										//  deUint64					accelerationStructureReference;
2373 	);
2374 
2375 	deMemcpy(bufferLocation, &accelerationStructureInstanceKHR, sizeof(VkAccelerationStructureInstanceKHR));
2376 }
2377 
updateInstanceBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelInstances,const std::vector<InstanceData> & instanceData,const BufferWithMemory * instanceBuffer,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2378 void updateInstanceBuffer (const DeviceInterface&												vk,
2379 						   const VkDevice														device,
2380 						   const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>&	bottomLevelInstances,
2381 						   const std::vector<InstanceData>&										instanceData,
2382 						   const BufferWithMemory*												instanceBuffer,
2383 						   VkAccelerationStructureBuildTypeKHR									buildType,
2384 						   bool																	inactiveInstances)
2385 {
2386 	DE_ASSERT(bottomLevelInstances.size() != 0);
2387 	DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2388 
2389 	auto&			instancesAlloc		= instanceBuffer->getAllocation();
2390 	auto			bufferStart			= reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2391 	VkDeviceSize	bufferOffset		= 0ull;
2392 
2393 	for (size_t instanceNdx = 0; instanceNdx < bottomLevelInstances.size(); ++instanceNdx)
2394 	{
2395 		const auto& blas = *bottomLevelInstances[instanceNdx];
2396 		updateSingleInstance(vk, device, blas, instanceData[instanceNdx], bufferStart + bufferOffset, buildType, inactiveInstances);
2397 		bufferOffset += sizeof(VkAccelerationStructureInstanceKHR);
2398 	}
2399 
2400 	flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2401 }
2402 
2403 class TopLevelAccelerationStructureKHR : public TopLevelAccelerationStructure
2404 {
2405 public:
2406 	static deUint32											getRequiredAllocationCount							(void);
2407 
2408 															TopLevelAccelerationStructureKHR					();
2409 															TopLevelAccelerationStructureKHR					(const TopLevelAccelerationStructureKHR&		other) = delete;
2410 	virtual													~TopLevelAccelerationStructureKHR					();
2411 
2412 	void													setBuildType										(const VkAccelerationStructureBuildTypeKHR		buildType) override;
2413 	void													setCreateFlags										(const VkAccelerationStructureCreateFlagsKHR	createFlags) override;
2414 	void													setCreateGeneric									(bool											createGeneric) override;
2415 	void													setBuildFlags										(const VkBuildAccelerationStructureFlagsKHR		buildFlags) override;
2416 	void													setBuildWithoutPrimitives							(bool											buildWithoutPrimitives) override;
2417 	void													setInactiveInstances								(bool											inactiveInstances) override;
2418 	void													setDeferredOperation								(const bool										deferredOperation,
2419 																												 const deUint32									workerThreadCount) override;
2420 	void													setUseArrayOfPointers								(const bool										useArrayOfPointers) override;
2421 	void													setIndirectBuildParameters							(const VkBuffer									indirectBuffer,
2422 																												 const VkDeviceSize								indirectBufferOffset,
2423 																												 const deUint32									indirectBufferStride) override;
2424 	void													setUsePPGeometries									(const bool										usePPGeometries) override;
2425 	void													setTryCachedMemory									(const bool										tryCachedMemory) override;
2426 	VkBuildAccelerationStructureFlagsKHR					getBuildFlags										() const override;
2427 
2428 	void													getCreationSizes									(const DeviceInterface&							vk,
2429 																												 const VkDevice									device,
2430 																												 const VkDeviceSize								structureSize,
2431 																												 CreationSizes&									sizes) override;
2432 	void													create												(const DeviceInterface&							vk,
2433 																												 const VkDevice									device,
2434 																												 Allocator&										allocator,
2435 																												 VkDeviceSize									structureSize,
2436 																												 VkDeviceAddress								deviceAddress			= 0u,
2437 																												 const void*									pNext					= DE_NULL,
2438 																												 const MemoryRequirement&						addMemoryRequirement	= MemoryRequirement::Any) override;
2439 	void													build												(const DeviceInterface&							vk,
2440 																												 const VkDevice									device,
2441 																												 const VkCommandBuffer							cmdBuffer) override;
2442 	void													copyFrom											(const DeviceInterface&							vk,
2443 																												 const VkDevice									device,
2444 																												 const VkCommandBuffer							cmdBuffer,
2445 																												 TopLevelAccelerationStructure*					accelerationStructure,
2446 																												 bool											compactCopy) override;
2447 	void													serialize											(const DeviceInterface&							vk,
2448 																												 const VkDevice									device,
2449 																												 const VkCommandBuffer							cmdBuffer,
2450 																												 SerialStorage*									storage) override;
2451 	void													deserialize											(const DeviceInterface&							vk,
2452 																												 const VkDevice									device,
2453 																												 const VkCommandBuffer							cmdBuffer,
2454 																												 SerialStorage*									storage) override;
2455 
2456 	std::vector<VkDeviceSize>								getSerializingSizes									(const DeviceInterface&							vk,
2457 																												 const VkDevice									device,
2458 																												 const VkQueue									queue,
2459 																												 const deUint32									queueFamilyIndex) override;
2460 
2461 	std::vector<deUint64>									getSerializingAddresses								(const DeviceInterface&							vk,
2462 																												 const VkDevice									device) const override;
2463 
2464 
2465 	const VkAccelerationStructureKHR*						getPtr												(void) const override;
2466 
2467 	void													updateInstanceMatrix								(const DeviceInterface&							vk,
2468 																												 const VkDevice									device,
2469 																												 size_t											instanceIndex,
2470 																												 const VkTransformMatrixKHR&					matrix) override;
2471 
2472 protected:
2473 	VkAccelerationStructureBuildTypeKHR						m_buildType;
2474 	VkAccelerationStructureCreateFlagsKHR					m_createFlags;
2475 	bool													m_createGeneric;
2476 	VkBuildAccelerationStructureFlagsKHR					m_buildFlags;
2477 	bool													m_buildWithoutPrimitives;
2478 	bool													m_inactiveInstances;
2479 	bool													m_deferredOperation;
2480 	deUint32												m_workerThreadCount;
2481 	bool													m_useArrayOfPointers;
2482 	de::MovePtr<BufferWithMemory>							m_accelerationStructureBuffer;
2483 	de::MovePtr<BufferWithMemory>							m_instanceBuffer;
2484 	de::MovePtr<BufferWithMemory>							m_instanceAddressBuffer;
2485 	de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
2486 	std::vector<deUint8>									m_hostScratchBuffer;
2487 	Move<VkAccelerationStructureKHR>						m_accelerationStructureKHR;
2488 	VkBuffer												m_indirectBuffer;
2489 	VkDeviceSize											m_indirectBufferOffset;
2490 	deUint32												m_indirectBufferStride;
2491 	bool													m_usePPGeometries;
2492 	bool													m_tryCachedMemory;
2493 
2494 
2495 	void													prepareInstances									(const DeviceInterface&							vk,
2496 																												 const VkDevice									device,
2497 																												 VkAccelerationStructureGeometryKHR&			accelerationStructureGeometryKHR,
2498 																												 std::vector<deUint32>&							maxPrimitiveCounts);
2499 
2500 	void													serializeBottoms									(const DeviceInterface&							vk,
2501 																												 const VkDevice									device,
2502 																												 const VkCommandBuffer							cmdBuffer,
2503 																												 SerialStorage*									storage,
2504 																												 VkDeferredOperationKHR							deferredOperation);
2505 
2506 	void													createAndDeserializeBottoms							(const DeviceInterface&							vk,
2507 																												 const VkDevice									device,
2508 																												 const VkCommandBuffer							cmdBuffer,
2509 																												 Allocator&										allocator,
2510 																												 SerialStorage*									storage) override;
2511 };
2512 
getRequiredAllocationCount(void)2513 deUint32 TopLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
2514 {
2515 	/*
2516 		de::MovePtr<BufferWithMemory>							m_instanceBuffer;
2517 		de::MovePtr<Allocation>									m_accelerationStructureAlloc;
2518 		de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
2519 	*/
2520 	return 3u;
2521 }
2522 
TopLevelAccelerationStructureKHR()2523 TopLevelAccelerationStructureKHR::TopLevelAccelerationStructureKHR ()
2524 	: TopLevelAccelerationStructure	()
2525 	, m_buildType					(VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2526 	, m_createFlags					(0u)
2527 	, m_createGeneric				(false)
2528 	, m_buildFlags					(0u)
2529 	, m_buildWithoutPrimitives		(false)
2530 	, m_inactiveInstances			(false)
2531 	, m_deferredOperation			(false)
2532 	, m_workerThreadCount			(0)
2533 	, m_useArrayOfPointers			(false)
2534 	, m_accelerationStructureBuffer	(DE_NULL)
2535 	, m_instanceBuffer				(DE_NULL)
2536 	, m_instanceAddressBuffer		(DE_NULL)
2537 	, m_deviceScratchBuffer			(DE_NULL)
2538 	, m_accelerationStructureKHR	()
2539 	, m_indirectBuffer				(DE_NULL)
2540 	, m_indirectBufferOffset		(0)
2541 	, m_indirectBufferStride		(0)
2542 	, m_usePPGeometries				(false)
2543 	, m_tryCachedMemory				(true)
2544 {
2545 }
2546 
~TopLevelAccelerationStructureKHR()2547 TopLevelAccelerationStructureKHR::~TopLevelAccelerationStructureKHR ()
2548 {
2549 }
2550 
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)2551 void TopLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR	buildType)
2552 {
2553 	m_buildType = buildType;
2554 }
2555 
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)2556 void TopLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR	createFlags)
2557 {
2558 	m_createFlags = createFlags;
2559 }
2560 
setCreateGeneric(bool createGeneric)2561 void TopLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
2562 {
2563 	m_createGeneric = createGeneric;
2564 }
2565 
setInactiveInstances(bool inactiveInstances)2566 void TopLevelAccelerationStructureKHR::setInactiveInstances (bool inactiveInstances)
2567 {
2568 	m_inactiveInstances = inactiveInstances;
2569 }
2570 
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)2571 void TopLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR	buildFlags)
2572 {
2573 	m_buildFlags = buildFlags;
2574 }
2575 
setBuildWithoutPrimitives(bool buildWithoutPrimitives)2576 void TopLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
2577 {
2578 	m_buildWithoutPrimitives = buildWithoutPrimitives;
2579 }
2580 
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)2581 void TopLevelAccelerationStructureKHR::setDeferredOperation (const bool		deferredOperation,
2582 															 const deUint32	workerThreadCount)
2583 {
2584 	m_deferredOperation = deferredOperation;
2585 	m_workerThreadCount = workerThreadCount;
2586 }
2587 
setUseArrayOfPointers(const bool useArrayOfPointers)2588 void TopLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool	useArrayOfPointers)
2589 {
2590 	m_useArrayOfPointers = useArrayOfPointers;
2591 }
2592 
setUsePPGeometries(const bool usePPGeometries)2593 void TopLevelAccelerationStructureKHR::setUsePPGeometries (const bool usePPGeometries)
2594 {
2595 	m_usePPGeometries = usePPGeometries;
2596 }
2597 
setTryCachedMemory(const bool tryCachedMemory)2598 void TopLevelAccelerationStructureKHR::setTryCachedMemory (const bool tryCachedMemory)
2599 {
2600 	m_tryCachedMemory = tryCachedMemory;
2601 }
2602 
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)2603 void TopLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer		indirectBuffer,
2604 																   const VkDeviceSize	indirectBufferOffset,
2605 																   const deUint32		indirectBufferStride)
2606 {
2607 	m_indirectBuffer		= indirectBuffer;
2608 	m_indirectBufferOffset	= indirectBufferOffset;
2609 	m_indirectBufferStride	= indirectBufferStride;
2610 }
2611 
getBuildFlags() const2612 VkBuildAccelerationStructureFlagsKHR TopLevelAccelerationStructureKHR::getBuildFlags () const
2613 {
2614 	return m_buildFlags;
2615 }
2616 
sum() const2617 VkDeviceSize TopLevelAccelerationStructure::CreationSizes::sum () const
2618 {
2619 	return structure + updateScratch + buildScratch + instancePointers + instancesBuffer;
2620 }
2621 
getCreationSizes(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize structureSize,CreationSizes & sizes)2622 void TopLevelAccelerationStructureKHR::getCreationSizes (const DeviceInterface&	vk,
2623 														 const VkDevice			device,
2624 														 const VkDeviceSize		structureSize,
2625 														 CreationSizes&			sizes)
2626 {
2627 	// AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2628 	// or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2629 	DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2630 
2631 	if (structureSize == 0)
2632 	{
2633 		VkAccelerationStructureGeometryKHR		accelerationStructureGeometryKHR;
2634 		const auto								accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2635 		std::vector<deUint32>					maxPrimitiveCounts;
2636 		prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2637 
2638 		VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR		=
2639 		{
2640 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,						//  VkStructureType										sType;
2641 			DE_NULL,																				//  const void*											pNext;
2642 			VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,											//  VkAccelerationStructureTypeKHR						type;
2643 			m_buildFlags,																			//  VkBuildAccelerationStructureFlagsKHR				flags;
2644 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,											//  VkBuildAccelerationStructureModeKHR					mode;
2645 			DE_NULL,																				//  VkAccelerationStructureKHR							srcAccelerationStructure;
2646 			DE_NULL,																				//  VkAccelerationStructureKHR							dstAccelerationStructure;
2647 			1u,																						//  deUint32											geometryCount;
2648 			(m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),						//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2649 			(m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),					//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2650 			makeDeviceOrHostAddressKHR(DE_NULL)														//  VkDeviceOrHostAddressKHR							scratchData;
2651 		};
2652 
2653 		VkAccelerationStructureBuildSizesInfoKHR	sizeInfo =
2654 		{
2655 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2656 			DE_NULL,														//  const void*		pNext;
2657 			0,																//  VkDeviceSize	accelerationStructureSize;
2658 			0,																//  VkDeviceSize	updateScratchSize;
2659 			0																//  VkDeviceSize	buildScratchSize;
2660 		};
2661 
2662 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2663 
2664 		sizes.structure		= sizeInfo.accelerationStructureSize;
2665 		sizes.updateScratch	= sizeInfo.updateScratchSize;
2666 		sizes.buildScratch	= sizeInfo.buildScratchSize;
2667 	}
2668 	else
2669 	{
2670 		sizes.structure		= structureSize;
2671 		sizes.updateScratch	= 0u;
2672 		sizes.buildScratch	= 0u;
2673 	}
2674 
2675 	sizes.instancePointers	= 0u;
2676 	if (m_useArrayOfPointers)
2677 	{
2678 		const size_t	pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2679 		sizes.instancePointers		= static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize);
2680 	}
2681 
2682 	sizes.instancesBuffer = m_bottomLevelInstances.empty() ? 0u : m_bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2683 }
2684 
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement)2685 void TopLevelAccelerationStructureKHR::create (const DeviceInterface&				vk,
2686 											   const VkDevice						device,
2687 											   Allocator&							allocator,
2688 											   VkDeviceSize							structureSize,
2689 											   VkDeviceAddress						deviceAddress,
2690 											   const void*							pNext,
2691 											   const MemoryRequirement&				addMemoryRequirement)
2692 {
2693 	// AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2694 	// or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2695 	DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2696 
2697 	if (structureSize == 0)
2698 	{
2699 		VkAccelerationStructureGeometryKHR		accelerationStructureGeometryKHR;
2700 		const auto								accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2701 		std::vector<deUint32>					maxPrimitiveCounts;
2702 		prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2703 
2704 		VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR		=
2705 		{
2706 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,						//  VkStructureType										sType;
2707 			DE_NULL,																				//  const void*											pNext;
2708 			VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,											//  VkAccelerationStructureTypeKHR						type;
2709 			m_buildFlags,																			//  VkBuildAccelerationStructureFlagsKHR				flags;
2710 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,											//  VkBuildAccelerationStructureModeKHR					mode;
2711 			DE_NULL,																				//  VkAccelerationStructureKHR							srcAccelerationStructure;
2712 			DE_NULL,																				//  VkAccelerationStructureKHR							dstAccelerationStructure;
2713 			1u,																						//  deUint32											geometryCount;
2714 			(m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),						//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2715 			(m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),					//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2716 			makeDeviceOrHostAddressKHR(DE_NULL)														//  VkDeviceOrHostAddressKHR							scratchData;
2717 		};
2718 
2719 		VkAccelerationStructureBuildSizesInfoKHR	sizeInfo =
2720 		{
2721 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2722 			DE_NULL,														//  const void*		pNext;
2723 			0,																//  VkDeviceSize	accelerationStructureSize;
2724 			0,																//  VkDeviceSize	updateScratchSize;
2725 			0																//  VkDeviceSize	buildScratchSize;
2726 		};
2727 
2728 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2729 
2730 		m_structureSize		= sizeInfo.accelerationStructureSize;
2731 		m_updateScratchSize	= sizeInfo.updateScratchSize;
2732 		m_buildScratchSize	= sizeInfo.buildScratchSize;
2733 	}
2734 	else
2735 	{
2736 		m_structureSize		= structureSize;
2737 		m_updateScratchSize	= 0u;
2738 		m_buildScratchSize	= 0u;
2739 	}
2740 
2741 	{
2742 		const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2743 		const MemoryRequirement		memoryRequirement	= addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
2744 
2745 		try
2746 		{
2747 			m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
2748 		}
2749 		catch (const tcu::NotSupportedError&)
2750 		{
2751 			// retry without Cached flag
2752 			m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
2753 		}
2754 	}
2755 
2756 	{
2757 		const VkAccelerationStructureTypeKHR		structureType						= (m_createGeneric
2758 																						   ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2759 																						   : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
2760 		const VkAccelerationStructureCreateInfoKHR	accelerationStructureCreateInfoKHR	=
2761 		{
2762 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,	//  VkStructureType											sType;
2763 			pNext,														//  const void*												pNext;
2764 			m_createFlags,												//  VkAccelerationStructureCreateFlagsKHR					createFlags;
2765 			m_accelerationStructureBuffer->get(),						//  VkBuffer												buffer;
2766 			0u,															//  VkDeviceSize											offset;
2767 			m_structureSize,											//  VkDeviceSize											size;
2768 			structureType,												//  VkAccelerationStructureTypeKHR							type;
2769 			deviceAddress												//  VkDeviceAddress											deviceAddress;
2770 		};
2771 
2772 		m_accelerationStructureKHR	= createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2773 	}
2774 
2775 	if (m_buildScratchSize > 0u)
2776 	{
2777 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2778 		{
2779 			const VkBufferCreateInfo		bufferCreateInfo	= makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2780 			m_deviceScratchBuffer								= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2781 		}
2782 		else
2783 		{
2784 			m_hostScratchBuffer.resize(static_cast<size_t>(m_buildScratchSize));
2785 		}
2786 	}
2787 
2788 	if (m_useArrayOfPointers)
2789 	{
2790 		const size_t				pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2791 		const VkBufferCreateInfo	bufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2792 		m_instanceAddressBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2793 	}
2794 
2795 	if(!m_bottomLevelInstances.empty())
2796 		m_instanceBuffer = de::MovePtr<BufferWithMemory>(createInstanceBuffer(vk, device, allocator, m_bottomLevelInstances, m_instanceData, m_tryCachedMemory));
2797 }
2798 
updateInstanceMatrix(const DeviceInterface & vk,const VkDevice device,size_t instanceIndex,const VkTransformMatrixKHR & matrix)2799 void TopLevelAccelerationStructureKHR::updateInstanceMatrix (const DeviceInterface& vk, const VkDevice device, size_t instanceIndex, const VkTransformMatrixKHR& matrix)
2800 {
2801 	DE_ASSERT(m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR);
2802 	DE_ASSERT(instanceIndex < m_bottomLevelInstances.size());
2803 	DE_ASSERT(instanceIndex < m_instanceData.size());
2804 
2805 	const auto&		blas			= *m_bottomLevelInstances[instanceIndex];
2806 	auto&			instanceData	= m_instanceData[instanceIndex];
2807 	auto&			instancesAlloc	= m_instanceBuffer->getAllocation();
2808 	auto			bufferStart		= reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2809 	VkDeviceSize	bufferOffset	= sizeof(VkAccelerationStructureInstanceKHR) * instanceIndex;
2810 
2811 	instanceData.matrix = matrix;
2812 	updateSingleInstance(vk, device, blas, instanceData, bufferStart + bufferOffset, m_buildType, m_inactiveInstances);
2813 	flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2814 }
2815 
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer)2816 void TopLevelAccelerationStructureKHR::build (const DeviceInterface&	vk,
2817 											  const VkDevice			device,
2818 											  const VkCommandBuffer		cmdBuffer)
2819 {
2820 	DE_ASSERT(!m_bottomLevelInstances.empty());
2821 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2822 	DE_ASSERT(m_buildScratchSize != 0);
2823 
2824 	updateInstanceBuffer(vk, device, m_bottomLevelInstances, m_instanceData, m_instanceBuffer.get(), m_buildType, m_inactiveInstances);
2825 
2826 	VkAccelerationStructureGeometryKHR		accelerationStructureGeometryKHR;
2827 	const auto								accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2828 	std::vector<deUint32>					maxPrimitiveCounts;
2829 	prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2830 
2831 	VkDeviceOrHostAddressKHR				scratchData										= (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2832 																							? makeDeviceOrHostAddressKHR(vk, device, m_deviceScratchBuffer->get(), 0)
2833 																							: makeDeviceOrHostAddressKHR(m_hostScratchBuffer.data());
2834 
2835 	VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR		=
2836 	{
2837 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,						//  VkStructureType										sType;
2838 		DE_NULL,																				//  const void*											pNext;
2839 		VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,											//  VkAccelerationStructureTypeKHR						type;
2840 		m_buildFlags,																			//  VkBuildAccelerationStructureFlagsKHR				flags;
2841 		VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,											//  VkBuildAccelerationStructureModeKHR					mode;
2842 		DE_NULL,																				//  VkAccelerationStructureKHR							srcAccelerationStructure;
2843 		m_accelerationStructureKHR.get(),														//  VkAccelerationStructureKHR							dstAccelerationStructure;
2844 		1u,																						//  deUint32											geometryCount;
2845 		(m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),						//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2846 		(m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),					//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2847 		scratchData																				//  VkDeviceOrHostAddressKHR							scratchData;
2848 	};
2849 
2850 	const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : static_cast<deUint32>(m_bottomLevelInstances.size()));
2851 
2852 	VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfoKHR =
2853 	{
2854 		primitiveCount,	//  deUint32	primitiveCount;
2855 		0,				//  deUint32	primitiveOffset;
2856 		0,				//  deUint32	firstVertex;
2857 		0				//  deUint32	transformOffset;
2858 	};
2859 	VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr	= &accelerationStructureBuildRangeInfoKHR;
2860 
2861 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2862 	{
2863 		if (m_indirectBuffer == DE_NULL)
2864 			vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2865 		else
2866 		{
2867 			VkDeviceAddress	indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
2868 			deUint32*		pMaxPrimitiveCounts = maxPrimitiveCounts.data();
2869 			vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
2870 		}
2871 	}
2872 	else if (!m_deferredOperation)
2873 	{
2874 		VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
2875 	}
2876 	else
2877 	{
2878 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
2879 		const auto deferredOperation	= deferredOperationPtr.get();
2880 
2881 		VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2882 
2883 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2884 
2885 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2886 
2887 		accelerationStructureBuildGeometryInfoKHR.pNext = DE_NULL;
2888 	}
2889 
2890 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2891 	{
2892 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2893 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
2894 
2895 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2896 	}
2897 }
2898 
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,TopLevelAccelerationStructure * accelerationStructure,bool compactCopy)2899 void TopLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&				vk,
2900 												 const VkDevice						device,
2901 												 const VkCommandBuffer				cmdBuffer,
2902 												 TopLevelAccelerationStructure*		accelerationStructure,
2903 												 bool								compactCopy)
2904 {
2905 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2906 	DE_ASSERT(accelerationStructure != DE_NULL);
2907 
2908 	VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
2909 	{
2910 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,															// VkStructureType						sType;
2911 		DE_NULL,																										// const void*							pNext;
2912 		*(accelerationStructure->getPtr()),																				// VkAccelerationStructureKHR			src;
2913 		*(getPtr()),																									// VkAccelerationStructureKHR			dst;
2914 		compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR	// VkCopyAccelerationStructureModeKHR	mode;
2915 	};
2916 
2917 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2918 	{
2919 		vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
2920 	}
2921 	else if (!m_deferredOperation)
2922 	{
2923 		VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
2924 	}
2925 	else
2926 	{
2927 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
2928 		const auto deferredOperation	= deferredOperationPtr.get();
2929 
2930 		VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
2931 
2932 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2933 
2934 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2935 	}
2936 
2937 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2938 	{
2939 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2940 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
2941 
2942 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2943 	}
2944 
2945 }
2946 
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)2947 void TopLevelAccelerationStructureKHR::serialize (const DeviceInterface&	vk,
2948 												  const VkDevice			device,
2949 												  const VkCommandBuffer		cmdBuffer,
2950 												  SerialStorage*			storage)
2951 {
2952 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2953 	DE_ASSERT(storage != DE_NULL);
2954 
2955 	const VkCopyAccelerationStructureToMemoryInfoKHR	copyAccelerationStructureInfo	=
2956 	{
2957 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,	// VkStructureType						sType;
2958 		DE_NULL,															// const void*							pNext;
2959 		*(getPtr()),														// VkAccelerationStructureKHR			src;
2960 		storage->getAddress(vk, device, m_buildType),						// VkDeviceOrHostAddressKHR				dst;
2961 		VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR	mode;
2962 	};
2963 
2964 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2965 	{
2966 		vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
2967 		if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2968 	}
2969 	else if (!m_deferredOperation)
2970 	{
2971 		VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
2972 		if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2973 	}
2974 	else
2975 	{
2976 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
2977 		const auto deferredOperation	= deferredOperationPtr.get();
2978 
2979 		const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
2980 
2981 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2982 		if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, deferredOperation);
2983 
2984 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2985 	}
2986 }
2987 
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)2988 void TopLevelAccelerationStructureKHR::deserialize (const DeviceInterface&	vk,
2989 													const VkDevice			device,
2990 													const VkCommandBuffer	cmdBuffer,
2991 													SerialStorage*			storage)
2992 {
2993 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2994 	DE_ASSERT(storage != DE_NULL);
2995 
2996 	const VkCopyMemoryToAccelerationStructureInfoKHR	copyAccelerationStructureInfo	=
2997 	{
2998 		VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,	// VkStructureType							sType;
2999 		DE_NULL,															// const void*								pNext;
3000 		storage->getAddressConst(vk, device, m_buildType),					// VkDeviceOrHostAddressConstKHR			src;
3001 		*(getPtr()),														// VkAccelerationStructureKHR				dst;
3002 		VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR		mode;
3003 	};
3004 
3005 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3006 	{
3007 		vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
3008 	}
3009 	else if (!m_deferredOperation)
3010 	{
3011 		VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
3012 	}
3013 	else
3014 	{
3015 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
3016 		const auto deferredOperation	= deferredOperationPtr.get();
3017 
3018 		const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
3019 
3020 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3021 
3022 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3023 	}
3024 
3025 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3026 	{
3027 		const VkAccessFlags		accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3028 		const VkMemoryBarrier	memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
3029 
3030 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3031 	}
3032 }
3033 
serializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage,VkDeferredOperationKHR deferredOperation)3034 void TopLevelAccelerationStructureKHR::serializeBottoms (const DeviceInterface&	vk,
3035 														 const VkDevice			device,
3036 														 const VkCommandBuffer	cmdBuffer,
3037 														 SerialStorage*			storage,
3038 														 VkDeferredOperationKHR	deferredOperation)
3039 {
3040 	DE_UNREF(deferredOperation);
3041 	DE_ASSERT(storage->hasDeepFormat());
3042 
3043 	const std::vector<deUint64>&	addresses		= storage->getSerialInfo().addresses();
3044 	const std::size_t				cbottoms		= m_bottomLevelInstances.size();
3045 
3046 	deUint32						storageIndex	= 0;
3047 	std::vector<deUint64>			matches;
3048 
3049 	for (std::size_t i = 0; i < cbottoms; ++i)
3050 	{
3051 		const deUint64& lookAddr	= addresses[i+1];
3052 		auto			end			= matches.end();
3053 		auto			match		= std::find_if(matches.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
3054 		if (match == end)
3055 		{
3056 			matches.emplace_back(lookAddr);
3057 			m_bottomLevelInstances[i].get()->serialize(vk, device, cmdBuffer, storage->getBottomStorage(storageIndex).get());
3058 			storageIndex += 1;
3059 		}
3060 	}
3061 }
3062 
createAndDeserializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage)3063 void TopLevelAccelerationStructureKHR::createAndDeserializeBottoms (const DeviceInterface&	vk,
3064 																	const VkDevice			device,
3065 																	const VkCommandBuffer	cmdBuffer,
3066 																	Allocator&				allocator,
3067 																	SerialStorage*			storage)
3068 {
3069 	DE_ASSERT(storage->hasDeepFormat());
3070 	DE_ASSERT(m_bottomLevelInstances.size() == 0);
3071 
3072 	const std::vector<deUint64>&					addresses		= storage->getSerialInfo().addresses();
3073 	const std::size_t								cbottoms		= addresses.size() - 1;
3074 	deUint32										storageIndex	= 0;
3075 	std::vector<std::pair<deUint64, std::size_t>>	matches;
3076 
3077 	for (std::size_t i = 0; i < cbottoms; ++i)
3078 	{
3079 		const deUint64& lookAddr	= addresses[i+1];
3080 		auto			end			= matches.end();
3081 		auto			match		= std::find_if(matches.begin(), end, [&](const std::pair<deUint64, std::size_t>& item){ return item.first == lookAddr; });
3082 		if (match != end)
3083 		{
3084 			m_bottomLevelInstances .emplace_back(m_bottomLevelInstances[match->second]);
3085 		}
3086 		else
3087 		{
3088 			de::MovePtr<BottomLevelAccelerationStructure> blas = makeBottomLevelAccelerationStructure();
3089 			blas->createAndDeserializeFrom(vk, device, cmdBuffer, allocator, storage->getBottomStorage(storageIndex).get());
3090 			m_bottomLevelInstances.emplace_back(de::SharedPtr<BottomLevelAccelerationStructure>(blas.release()));
3091 			matches.emplace_back(lookAddr, i);
3092 			storageIndex += 1;
3093 		}
3094 	}
3095 
3096 	std::vector<deUint64>						newAddresses	= getSerializingAddresses(vk, device);
3097 	DE_ASSERT(addresses.size() == newAddresses.size());
3098 
3099 	SerialStorage::AccelerationStructureHeader* header			= storage->getASHeader();
3100 	DE_ASSERT(cbottoms ==header->handleCount);
3101 
3102 	// finally update bottom-level AS addresses before top-level AS deserialization
3103 	for (std::size_t i = 0; i < cbottoms; ++i)
3104 	{
3105 		header->handleArray[i] = newAddresses[i+1];
3106 	}
3107 }
3108 
getSerializingSizes(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const deUint32 queueFamilyIndex)3109 std::vector<VkDeviceSize> TopLevelAccelerationStructureKHR::getSerializingSizes (const DeviceInterface&	vk,
3110 																				 const VkDevice			device,
3111 																				 const VkQueue			queue,
3112 																				 const deUint32			queueFamilyIndex)
3113 {
3114 	const deUint32							queryCount(deUint32(m_bottomLevelInstances.size()) + 1);
3115 	std::vector<VkAccelerationStructureKHR>	handles(queryCount);
3116 	std::vector<VkDeviceSize>				sizes(queryCount);
3117 
3118 	handles[0] = m_accelerationStructureKHR.get();
3119 
3120 	for (deUint32 h = 1; h < queryCount; ++h)
3121 		handles[h] = *m_bottomLevelInstances[h-1].get()->getPtr();
3122 
3123 	if (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR == m_buildType)
3124 		queryAccelerationStructureSize(vk, device, DE_NULL, handles, m_buildType, DE_NULL, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3125 	else
3126 	{
3127 		const Move<VkCommandPool>	cmdPool		= createCommandPool(vk, device, 0, queueFamilyIndex);
3128 		const Move<VkCommandBuffer>	cmdBuffer	= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3129 		const Move<VkQueryPool>		queryPool	= makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3130 
3131 		beginCommandBuffer(vk, *cmdBuffer);
3132 		queryAccelerationStructureSize(vk, device, *cmdBuffer, handles, m_buildType, *queryPool, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3133 		endCommandBuffer(vk, *cmdBuffer);
3134 		submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
3135 
3136 		VK_CHECK(vk.getQueryPoolResults(device, *queryPool, 0u, queryCount, queryCount * sizeof(VkDeviceSize), sizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3137 	}
3138 
3139 	return sizes;
3140 }
3141 
getSerializingAddresses(const DeviceInterface & vk,const VkDevice device) const3142 std::vector<deUint64> TopLevelAccelerationStructureKHR::getSerializingAddresses (const DeviceInterface& vk, const VkDevice device) const
3143 {
3144 	std::vector<deUint64> result(m_bottomLevelInstances.size() + 1);
3145 
3146 	VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
3147 	{
3148 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,	// VkStructureType				sType;
3149 		DE_NULL,															// const void*					pNext;
3150 		DE_NULL																// VkAccelerationStructureKHR	accelerationStructure;
3151 	};
3152 
3153 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3154 	{
3155 		asDeviceAddressInfo.accelerationStructure = m_accelerationStructureKHR.get();
3156 		result[0] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3157 	}
3158 	else
3159 	{
3160 		result[0] = deUint64(getPtr()->getInternal());
3161 	}
3162 
3163 	for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3164 	{
3165 		const BottomLevelAccelerationStructure&		bottomLevelAccelerationStructure	= *m_bottomLevelInstances[instanceNdx];
3166 		const VkAccelerationStructureKHR			accelerationStructureKHR			= *bottomLevelAccelerationStructure.getPtr();
3167 
3168 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3169 		{
3170 			asDeviceAddressInfo.accelerationStructure = accelerationStructureKHR;
3171 			result[instanceNdx+1] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3172 		}
3173 		else
3174 		{
3175 			result[instanceNdx+1] = deUint64(accelerationStructureKHR.getInternal());
3176 		}
3177 	}
3178 
3179 	return result;
3180 }
3181 
getPtr(void) const3182 const VkAccelerationStructureKHR* TopLevelAccelerationStructureKHR::getPtr (void) const
3183 {
3184 	return &m_accelerationStructureKHR.get();
3185 }
3186 
prepareInstances(const DeviceInterface & vk,const VkDevice device,VkAccelerationStructureGeometryKHR & accelerationStructureGeometryKHR,std::vector<deUint32> & maxPrimitiveCounts)3187 void TopLevelAccelerationStructureKHR::prepareInstances (const DeviceInterface&							vk,
3188 														 const VkDevice									device,
3189 														 VkAccelerationStructureGeometryKHR&			accelerationStructureGeometryKHR,
3190 														 std::vector<deUint32>&							maxPrimitiveCounts)
3191 {
3192 	maxPrimitiveCounts.resize(1);
3193 	maxPrimitiveCounts[0] = static_cast<deUint32>(m_bottomLevelInstances.size());
3194 
3195 	VkDeviceOrHostAddressConstKHR							instancesData;
3196 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3197 	{
3198 		if(m_instanceBuffer.get() != DE_NULL)
3199 		{
3200 			if (m_useArrayOfPointers)
3201 			{
3202 				deUint8*						bufferStart			= static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3203 				VkDeviceSize					bufferOffset		= 0;
3204 				VkDeviceOrHostAddressConstKHR	firstInstance		= makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3205 				for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3206 				{
3207 					VkDeviceOrHostAddressConstKHR	currentInstance;
3208 					currentInstance.deviceAddress	= firstInstance.deviceAddress + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3209 
3210 					deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress));
3211 					bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress);
3212 				}
3213 				flushMappedMemoryRange(vk, device, m_instanceAddressBuffer->getAllocation().getMemory(), m_instanceAddressBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
3214 
3215 				instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceAddressBuffer->get(), 0);
3216 			}
3217 			else
3218 				instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3219 		}
3220 		else
3221 			instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3222 	}
3223 	else
3224 	{
3225 		if (m_instanceBuffer.get() != DE_NULL)
3226 		{
3227 			if (m_useArrayOfPointers)
3228 			{
3229 				deUint8*						bufferStart			= static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3230 				VkDeviceSize					bufferOffset		= 0;
3231 				for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3232 				{
3233 					VkDeviceOrHostAddressConstKHR	currentInstance;
3234 					currentInstance.hostAddress	= (deUint8*)m_instanceBuffer->getAllocation().getHostPtr() + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3235 
3236 					deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::hostAddress));
3237 					bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
3238 				}
3239 				instancesData = makeDeviceOrHostAddressConstKHR(m_instanceAddressBuffer->getAllocation().getHostPtr());
3240 			}
3241 			else
3242 				instancesData = makeDeviceOrHostAddressConstKHR(m_instanceBuffer->getAllocation().getHostPtr());
3243 		}
3244 		else
3245 			instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3246 	}
3247 
3248 	VkAccelerationStructureGeometryInstancesDataKHR accelerationStructureGeometryInstancesDataKHR	=
3249 	{
3250 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,	//  VkStructureType					sType;
3251 		DE_NULL,																//  const void*						pNext;
3252 		(VkBool32)( m_useArrayOfPointers ? DE_TRUE : DE_FALSE ),				//  VkBool32						arrayOfPointers;
3253 		instancesData															//  VkDeviceOrHostAddressConstKHR	data;
3254 	};
3255 
3256 	accelerationStructureGeometryKHR					=
3257 	{
3258 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,										//  VkStructureType							sType;
3259 		DE_NULL,																					//  const void*								pNext;
3260 		VK_GEOMETRY_TYPE_INSTANCES_KHR,																//  VkGeometryTypeKHR						geometryType;
3261 		makeVkAccelerationStructureInstancesDataKHR(accelerationStructureGeometryInstancesDataKHR),	//  VkAccelerationStructureGeometryDataKHR	geometry;
3262 		(VkGeometryFlagsKHR)0u																		//  VkGeometryFlagsKHR						flags;
3263 	};
3264 }
3265 
getRequiredAllocationCount(void)3266 deUint32 TopLevelAccelerationStructure::getRequiredAllocationCount (void)
3267 {
3268 	return TopLevelAccelerationStructureKHR::getRequiredAllocationCount();
3269 }
3270 
makeTopLevelAccelerationStructure()3271 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure ()
3272 {
3273 	return de::MovePtr<TopLevelAccelerationStructure>(new TopLevelAccelerationStructureKHR);
3274 }
3275 
queryAccelerationStructureSizeKHR(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3276 bool queryAccelerationStructureSizeKHR (const DeviceInterface&							vk,
3277 										const VkDevice									device,
3278 										const VkCommandBuffer							cmdBuffer,
3279 										const std::vector<VkAccelerationStructureKHR>&	accelerationStructureHandles,
3280 										VkAccelerationStructureBuildTypeKHR				buildType,
3281 										const VkQueryPool								queryPool,
3282 										VkQueryType										queryType,
3283 										deUint32										firstQuery,
3284 										std::vector<VkDeviceSize>&						results)
3285 {
3286 	DE_ASSERT(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR);
3287 
3288 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3289 	{
3290 		// queryPool must be large enough to contain at least (firstQuery + accelerationStructureHandles.size()) queries
3291 		vk.cmdResetQueryPool(cmdBuffer, queryPool, firstQuery, deUint32(accelerationStructureHandles.size()));
3292 		vk.cmdWriteAccelerationStructuresPropertiesKHR(cmdBuffer, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType, queryPool, firstQuery);
3293 		// results cannot be retrieved to CPU at the moment - you need to do it using getQueryPoolResults after cmdBuffer is executed. Meanwhile function returns a vector of 0s.
3294 		results.resize(accelerationStructureHandles.size(), 0u);
3295 		return false;
3296 	}
3297 	// buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
3298 	results.resize(accelerationStructureHandles.size(), 0u);
3299 	vk.writeAccelerationStructuresPropertiesKHR(device, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType,
3300 												sizeof(VkDeviceSize) * accelerationStructureHandles.size(), results.data(), sizeof(VkDeviceSize));
3301 	// results will contain proper values
3302 	return true;
3303 }
3304 
queryAccelerationStructureSize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3305 bool queryAccelerationStructureSize (const DeviceInterface&							vk,
3306 									 const VkDevice									device,
3307 									 const VkCommandBuffer							cmdBuffer,
3308 									 const std::vector<VkAccelerationStructureKHR>&	accelerationStructureHandles,
3309 									 VkAccelerationStructureBuildTypeKHR			buildType,
3310 									 const VkQueryPool								queryPool,
3311 									 VkQueryType									queryType,
3312 									 deUint32										firstQuery,
3313 									 std::vector<VkDeviceSize>&						results)
3314 {
3315 	return queryAccelerationStructureSizeKHR(vk, device, cmdBuffer, accelerationStructureHandles, buildType, queryPool, queryType, firstQuery, results);
3316 }
3317 
RayTracingPipeline()3318 RayTracingPipeline::RayTracingPipeline ()
3319 	: m_shadersModules			()
3320 	, m_pipelineLibraries		()
3321 	, m_shaderCreateInfos		()
3322 	, m_shadersGroupCreateInfos	()
3323 	, m_pipelineCreateFlags		(0U)
3324 	, m_maxRecursionDepth		(1U)
3325 	, m_maxPayloadSize			(0U)
3326 	, m_maxAttributeSize		(0U)
3327 	, m_deferredOperation		(false)
3328 	, m_workerThreadCount		(0)
3329 {
3330 }
3331 
~RayTracingPipeline()3332 RayTracingPipeline::~RayTracingPipeline ()
3333 {
3334 }
3335 
3336 #define CHECKED_ASSIGN_SHADER(SHADER, STAGE)						\
3337 	if (SHADER == VK_SHADER_UNUSED_KHR)								\
3338 		SHADER = STAGE;												\
3339 	else															\
3340 		TCU_THROW(InternalError, "Attempt to reassign shader")
3341 
addShader(VkShaderStageFlagBits shaderStage,Move<VkShaderModule> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfo,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3342 void RayTracingPipeline::addShader (VkShaderStageFlagBits					shaderStage,
3343 									Move<VkShaderModule>					shaderModule,
3344 									deUint32								group,
3345 									const VkSpecializationInfo*				specializationInfo,
3346 									const VkPipelineShaderStageCreateFlags	pipelineShaderStageCreateFlags,
3347 									const void*								pipelineShaderStageCreateInfopNext)
3348 {
3349 	addShader(shaderStage, makeVkSharedPtr(shaderModule), group, specializationInfo, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3350 }
3351 
addShader(VkShaderStageFlagBits shaderStage,de::SharedPtr<Move<VkShaderModule>> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3352 void RayTracingPipeline::addShader (VkShaderStageFlagBits					shaderStage,
3353 									de::SharedPtr<Move<VkShaderModule>>		shaderModule,
3354 									deUint32								group,
3355 									const VkSpecializationInfo*				specializationInfoPtr,
3356 									const VkPipelineShaderStageCreateFlags	pipelineShaderStageCreateFlags,
3357 									const void*								pipelineShaderStageCreateInfopNext)
3358 {
3359 	addShader(shaderStage, **shaderModule, group, specializationInfoPtr, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3360 	m_shadersModules.push_back(shaderModule);
3361 }
3362 
addShader(VkShaderStageFlagBits shaderStage,VkShaderModule shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3363 void RayTracingPipeline::addShader (VkShaderStageFlagBits					shaderStage,
3364 									VkShaderModule							shaderModule,
3365 									deUint32								group,
3366 									const VkSpecializationInfo*				specializationInfoPtr,
3367 									const VkPipelineShaderStageCreateFlags	pipelineShaderStageCreateFlags,
3368 									const void*								pipelineShaderStageCreateInfopNext)
3369 {
3370 	if (group >= m_shadersGroupCreateInfos.size())
3371 	{
3372 		for (size_t groupNdx = m_shadersGroupCreateInfos.size(); groupNdx <= group; ++groupNdx)
3373 		{
3374 			VkRayTracingShaderGroupCreateInfoKHR	shaderGroupCreateInfo	=
3375 			{
3376 				VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,	//  VkStructureType					sType;
3377 				DE_NULL,													//  const void*						pNext;
3378 				VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR,				//  VkRayTracingShaderGroupTypeKHR	type;
3379 				VK_SHADER_UNUSED_KHR,										//  deUint32						generalShader;
3380 				VK_SHADER_UNUSED_KHR,										//  deUint32						closestHitShader;
3381 				VK_SHADER_UNUSED_KHR,										//  deUint32						anyHitShader;
3382 				VK_SHADER_UNUSED_KHR,										//  deUint32						intersectionShader;
3383 				DE_NULL,													//  const void*						pShaderGroupCaptureReplayHandle;
3384 			};
3385 
3386 			m_shadersGroupCreateInfos.push_back(shaderGroupCreateInfo);
3387 		}
3388 	}
3389 
3390 	const deUint32							shaderStageNdx			= (deUint32)m_shaderCreateInfos.size();
3391 	VkRayTracingShaderGroupCreateInfoKHR&	shaderGroupCreateInfo	= m_shadersGroupCreateInfos[group];
3392 
3393 	switch (shaderStage)
3394 	{
3395 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:		CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,		shaderStageNdx);	break;
3396 		case VK_SHADER_STAGE_MISS_BIT_KHR:			CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,		shaderStageNdx);	break;
3397 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:		CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,		shaderStageNdx);	break;
3398 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:		CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.anyHitShader,		shaderStageNdx);	break;
3399 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:	CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.closestHitShader,	shaderStageNdx);	break;
3400 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:	CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.intersectionShader,	shaderStageNdx);	break;
3401 		default:									TCU_THROW(InternalError, "Unacceptable stage");
3402 	}
3403 
3404 	switch (shaderStage)
3405 	{
3406 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
3407 		case VK_SHADER_STAGE_MISS_BIT_KHR:
3408 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
3409 		{
3410 			DE_ASSERT(shaderGroupCreateInfo.type == VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR);
3411 			shaderGroupCreateInfo.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
3412 
3413 			break;
3414 		}
3415 
3416 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
3417 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
3418 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
3419 		{
3420 			DE_ASSERT(shaderGroupCreateInfo.type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR);
3421 			shaderGroupCreateInfo.type	= (shaderGroupCreateInfo.intersectionShader == VK_SHADER_UNUSED_KHR)
3422 										? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR
3423 										: VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR;
3424 
3425 			break;
3426 		}
3427 
3428 		default: TCU_THROW(InternalError, "Unacceptable stage");
3429 	}
3430 
3431 	{
3432 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo	=
3433 		{
3434 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//  VkStructureType						sType;
3435 			pipelineShaderStageCreateInfopNext,						//  const void*							pNext;
3436 			pipelineShaderStageCreateFlags,							//  VkPipelineShaderStageCreateFlags	flags;
3437 			shaderStage,											//  VkShaderStageFlagBits				stage;
3438 			shaderModule,											//  VkShaderModule						module;
3439 			"main",													//  const char*							pName;
3440 			specializationInfoPtr,									//  const VkSpecializationInfo*			pSpecializationInfo;
3441 		};
3442 
3443 		m_shaderCreateInfos.push_back(shaderCreateInfo);
3444 	}
3445 }
3446 
addLibrary(de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)3447 void RayTracingPipeline::addLibrary (de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)
3448 {
3449 	m_pipelineLibraries.push_back(pipelineLibrary);
3450 }
3451 
createPipelineKHR(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3452 Move<VkPipeline> RayTracingPipeline::createPipelineKHR (const DeviceInterface&			vk,
3453 														const VkDevice					device,
3454 														const VkPipelineLayout			pipelineLayout,
3455 														const std::vector<VkPipeline>&	pipelineLibraries,
3456 														const VkPipelineCache			pipelineCache)
3457 {
3458 	for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3459 		DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3460 
3461 	VkPipelineLibraryCreateInfoKHR				librariesCreateInfo	=
3462 	{
3463 		VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR,	//  VkStructureType	sType;
3464 		DE_NULL,											//  const void*		pNext;
3465 		de::sizeU32(pipelineLibraries),						//  deUint32		libraryCount;
3466 		de::dataOrNull(pipelineLibraries)					//  VkPipeline*		pLibraries;
3467 	};
3468 	const VkRayTracingPipelineInterfaceCreateInfoKHR	pipelineInterfaceCreateInfo		=
3469 	{
3470 		VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR,	//  VkStructureType	sType;
3471 		DE_NULL,															//  const void*		pNext;
3472 		m_maxPayloadSize,													//  deUint32		maxPayloadSize;
3473 		m_maxAttributeSize													//  deUint32		maxAttributeSize;
3474 	};
3475 	const bool											addPipelineInterfaceCreateInfo	= m_maxPayloadSize != 0 || m_maxAttributeSize != 0;
3476 	const VkRayTracingPipelineInterfaceCreateInfoKHR*	pipelineInterfaceCreateInfoPtr	= addPipelineInterfaceCreateInfo ? &pipelineInterfaceCreateInfo : DE_NULL;
3477 	const VkPipelineLibraryCreateInfoKHR*				librariesCreateInfoPtr			= (pipelineLibraries.empty() ? nullptr : &librariesCreateInfo);
3478 
3479 	Move<VkDeferredOperationKHR>						deferredOperation;
3480 	if (m_deferredOperation)
3481 		deferredOperation = createDeferredOperationKHR(vk, device);
3482 
3483 	VkPipelineDynamicStateCreateInfo dynamicStateCreateInfo	=
3484 	{
3485 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	// VkStructureType						sType;
3486 		DE_NULL,												// const void*							pNext;
3487 		0,														// VkPipelineDynamicStateCreateFlags	flags;
3488 		static_cast<deUint32>(m_dynamicStates.size() ),			// deUint32								dynamicStateCount;
3489 		m_dynamicStates.data(),									// const VkDynamicState*				pDynamicStates;
3490 	};
3491 
3492 	const VkRayTracingPipelineCreateInfoKHR				pipelineCreateInfo				=
3493 	{
3494 		VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,	//  VkStructureType								sType;
3495 		DE_NULL,												//  const void*									pNext;
3496 		m_pipelineCreateFlags,									//  VkPipelineCreateFlags						flags;
3497 		de::sizeU32(m_shaderCreateInfos),						//  deUint32									stageCount;
3498 		de::dataOrNull(m_shaderCreateInfos),					//  const VkPipelineShaderStageCreateInfo*		pStages;
3499 		de::sizeU32(m_shadersGroupCreateInfos),					//  deUint32									groupCount;
3500 		de::dataOrNull(m_shadersGroupCreateInfos),				//  const VkRayTracingShaderGroupCreateInfoKHR*	pGroups;
3501 		m_maxRecursionDepth,									//  deUint32									maxRecursionDepth;
3502 		librariesCreateInfoPtr,									//  VkPipelineLibraryCreateInfoKHR*				pLibraryInfo;
3503 		pipelineInterfaceCreateInfoPtr,							//  VkRayTracingPipelineInterfaceCreateInfoKHR*	pLibraryInterface;
3504 		&dynamicStateCreateInfo,								//  const VkPipelineDynamicStateCreateInfo*		pDynamicState;
3505 		pipelineLayout,											//  VkPipelineLayout							layout;
3506 		(VkPipeline)DE_NULL,									//  VkPipeline									basePipelineHandle;
3507 		0,														//  deInt32										basePipelineIndex;
3508 	};
3509 	VkPipeline											object							= DE_NULL;
3510 	VkResult											result							= vk.createRayTracingPipelinesKHR(device, deferredOperation.get(), pipelineCache, 1u, &pipelineCreateInfo, DE_NULL, &object);
3511 	const bool											allowCompileRequired			= ((m_pipelineCreateFlags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) != 0);
3512 
3513 	if (m_deferredOperation)
3514 	{
3515 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS || (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED));
3516 		finishDeferredOperation(vk, device, deferredOperation.get(), m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3517 	}
3518 
3519 	if (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED)
3520 		throw CompileRequiredError("createRayTracingPipelinesKHR returned VK_PIPELINE_COMPILE_REQUIRED");
3521 
3522 	Move<VkPipeline> pipeline (check<VkPipeline>(object), Deleter<VkPipeline>(vk, device, DE_NULL));
3523 	return pipeline;
3524 }
3525 
3526 
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<de::SharedPtr<Move<VkPipeline>>> & pipelineLibraries)3527 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface&									vk,
3528 													 const VkDevice											device,
3529 													 const VkPipelineLayout									pipelineLayout,
3530 													 const std::vector<de::SharedPtr<Move<VkPipeline>>>&	pipelineLibraries)
3531 {
3532 	std::vector<VkPipeline> rawPipelines;
3533 	rawPipelines.reserve(pipelineLibraries.size());
3534 	for (const auto& lib : pipelineLibraries)
3535 		rawPipelines.push_back(lib.get()->get());
3536 
3537 	return createPipelineKHR(vk, device, pipelineLayout, rawPipelines);
3538 }
3539 
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3540 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface&			vk,
3541 													 const VkDevice					device,
3542 													 const VkPipelineLayout			pipelineLayout,
3543 													 const std::vector<VkPipeline>&	pipelineLibraries,
3544 													 const VkPipelineCache			pipelineCache)
3545 {
3546 	return createPipelineKHR(vk, device, pipelineLayout, pipelineLibraries, pipelineCache);
3547 }
3548 
createPipelineWithLibraries(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout)3549 std::vector<de::SharedPtr<Move<VkPipeline>>> RayTracingPipeline::createPipelineWithLibraries (const DeviceInterface&			vk,
3550 																								const VkDevice					device,
3551 																								const VkPipelineLayout			pipelineLayout)
3552 {
3553 	for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3554 		DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3555 
3556 	DE_ASSERT(m_shaderCreateInfos.size() > 0);
3557 	DE_ASSERT(m_shadersGroupCreateInfos.size() > 0);
3558 
3559 	std::vector<de::SharedPtr<Move<VkPipeline>>> result, allLibraries, firstLibraries;
3560 	for(auto it=begin(m_pipelineLibraries), eit=end(m_pipelineLibraries); it!=eit; ++it)
3561 	{
3562 		auto childLibraries = (*it)->get()->createPipelineWithLibraries(vk, device, pipelineLayout);
3563 		DE_ASSERT(childLibraries.size() > 0);
3564 		firstLibraries.push_back(childLibraries[0]);
3565 		std::copy(begin(childLibraries), end(childLibraries), std::back_inserter(allLibraries));
3566 	}
3567 	result.push_back(makeVkSharedPtr(createPipeline(vk, device, pipelineLayout, firstLibraries)));
3568 	std::copy(begin(allLibraries), end(allLibraries), std::back_inserter(result));
3569 	return result;
3570 }
3571 
createShaderBindingTable(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,Allocator & allocator,const deUint32 & shaderGroupHandleSize,const deUint32 shaderGroupBaseAlignment,const deUint32 & firstGroup,const deUint32 & groupCount,const VkBufferCreateFlags & additionalBufferCreateFlags,const VkBufferUsageFlags & additionalBufferUsageFlags,const MemoryRequirement & additionalMemoryRequirement,const VkDeviceAddress & opaqueCaptureAddress,const deUint32 shaderBindingTableOffset,const deUint32 shaderRecordSize,const void ** shaderGroupDataPtrPerGroup,const bool autoAlignRecords)3572 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable (const DeviceInterface&		vk,
3573 																			const VkDevice				device,
3574 																			const VkPipeline			pipeline,
3575 																			Allocator&					allocator,
3576 																			const deUint32&				shaderGroupHandleSize,
3577 																			const deUint32				shaderGroupBaseAlignment,
3578 																			const deUint32&				firstGroup,
3579 																			const deUint32&				groupCount,
3580 																			const VkBufferCreateFlags&	additionalBufferCreateFlags,
3581 																			const VkBufferUsageFlags&	additionalBufferUsageFlags,
3582 																			const MemoryRequirement&	additionalMemoryRequirement,
3583 																			const VkDeviceAddress&		opaqueCaptureAddress,
3584 																			const deUint32				shaderBindingTableOffset,
3585 																			const deUint32				shaderRecordSize,
3586 																			const void**				shaderGroupDataPtrPerGroup,
3587 																			const bool					autoAlignRecords)
3588 {
3589 	DE_ASSERT(shaderGroupBaseAlignment != 0u);
3590 	DE_ASSERT((shaderBindingTableOffset % shaderGroupBaseAlignment) == 0);
3591 	DE_UNREF(shaderGroupBaseAlignment);
3592 
3593 	const auto								totalEntrySize					= (autoAlignRecords ? (deAlign32(shaderGroupHandleSize + shaderRecordSize, shaderGroupHandleSize)) : (shaderGroupHandleSize + shaderRecordSize));
3594 	const deUint32							sbtSize							= shaderBindingTableOffset + groupCount * totalEntrySize;
3595 	const VkBufferUsageFlags				sbtFlags						= VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | additionalBufferUsageFlags;
3596 	VkBufferCreateInfo						sbtCreateInfo					= makeBufferCreateInfo(sbtSize, sbtFlags);
3597 	sbtCreateInfo.flags														|= additionalBufferCreateFlags;
3598 	VkBufferOpaqueCaptureAddressCreateInfo	sbtCaptureAddressInfo			=
3599 	{
3600 		VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO,	// VkStructureType	sType;
3601 		DE_NULL,														// const void*		pNext;
3602 		deUint64(opaqueCaptureAddress)									// deUint64			opaqueCaptureAddress;
3603 	};
3604 
3605 	if (opaqueCaptureAddress != 0u)
3606 	{
3607 		sbtCreateInfo.pNext = &sbtCaptureAddressInfo;
3608 		sbtCreateInfo.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
3609 	}
3610 	const MemoryRequirement			sbtMemRequirements						= MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress | additionalMemoryRequirement;
3611 	de::MovePtr<BufferWithMemory>	sbtBuffer								= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, sbtCreateInfo, sbtMemRequirements));
3612 	vk::Allocation&					sbtAlloc								= sbtBuffer->getAllocation();
3613 
3614 	// collect shader group handles
3615 	std::vector<deUint8>			shaderHandles							(groupCount * shaderGroupHandleSize);
3616 	VK_CHECK(getRayTracingShaderGroupHandles(vk, device, pipeline, firstGroup, groupCount, groupCount * shaderGroupHandleSize, shaderHandles.data()));
3617 
3618 	// reserve place for ShaderRecordKHR after each shader handle ( ShaderRecordKHR size might be 0 ). Also take alignment into consideration
3619 	deUint8* shaderBegin = (deUint8*)sbtAlloc.getHostPtr() + shaderBindingTableOffset;
3620 	for (deUint32 idx = 0; idx < groupCount; ++idx)
3621 	{
3622 		deUint8* shaderSrcPos	= shaderHandles.data() + idx * shaderGroupHandleSize;
3623 		deUint8* shaderDstPos	= shaderBegin + idx * totalEntrySize;
3624 		deMemcpy(shaderDstPos, shaderSrcPos, shaderGroupHandleSize);
3625 
3626 		if (shaderGroupDataPtrPerGroup		!= nullptr &&
3627 			shaderGroupDataPtrPerGroup[idx] != nullptr)
3628 		{
3629 			DE_ASSERT(sbtSize >= static_cast<deUint32>(shaderDstPos - shaderBegin) + shaderGroupHandleSize);
3630 
3631 			deMemcpy(	shaderDstPos + shaderGroupHandleSize,
3632 						shaderGroupDataPtrPerGroup[idx],
3633 						shaderRecordSize);
3634 		}
3635 	}
3636 
3637 	flushMappedMemoryRange(vk, device, sbtAlloc.getMemory(), sbtAlloc.getOffset(), VK_WHOLE_SIZE);
3638 
3639 	return sbtBuffer;
3640 }
3641 
setCreateFlags(const VkPipelineCreateFlags & pipelineCreateFlags)3642 void RayTracingPipeline::setCreateFlags (const VkPipelineCreateFlags& pipelineCreateFlags)
3643 {
3644 	m_pipelineCreateFlags = pipelineCreateFlags;
3645 }
3646 
setMaxRecursionDepth(const deUint32 & maxRecursionDepth)3647 void RayTracingPipeline::setMaxRecursionDepth (const deUint32& maxRecursionDepth)
3648 {
3649 	m_maxRecursionDepth = maxRecursionDepth;
3650 }
3651 
setMaxPayloadSize(const deUint32 & maxPayloadSize)3652 void RayTracingPipeline::setMaxPayloadSize (const deUint32& maxPayloadSize)
3653 {
3654 	m_maxPayloadSize = maxPayloadSize;
3655 }
3656 
setMaxAttributeSize(const deUint32 & maxAttributeSize)3657 void RayTracingPipeline::setMaxAttributeSize (const deUint32& maxAttributeSize)
3658 {
3659 	m_maxAttributeSize = maxAttributeSize;
3660 }
3661 
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)3662 void RayTracingPipeline::setDeferredOperation (const bool		deferredOperation,
3663 											   const deUint32	workerThreadCount)
3664 {
3665 	m_deferredOperation = deferredOperation;
3666 	m_workerThreadCount = workerThreadCount;
3667 }
3668 
addDynamicState(const VkDynamicState & dynamicState)3669 void RayTracingPipeline::addDynamicState(const VkDynamicState& dynamicState)
3670 {
3671 	m_dynamicStates.push_back(dynamicState);
3672 }
3673 
3674 class RayTracingPropertiesKHR : public RayTracingProperties
3675 {
3676 public:
3677 							RayTracingPropertiesKHR						() = delete;
3678 							RayTracingPropertiesKHR						(const InstanceInterface&	vki,
3679 																		 const VkPhysicalDevice		physicalDevice);
3680 	virtual					~RayTracingPropertiesKHR					();
3681 
getShaderGroupHandleSize(void)3682 	uint32_t		getShaderGroupHandleSize					(void)	override { return m_rayTracingPipelineProperties.shaderGroupHandleSize;						}
getShaderGroupHandleAlignment(void)3683 	uint32_t		getShaderGroupHandleAlignment				(void)	override { return m_rayTracingPipelineProperties.shaderGroupHandleAlignment;				}
getMaxRecursionDepth(void)3684 	uint32_t		getMaxRecursionDepth						(void)	override { return m_rayTracingPipelineProperties.maxRayRecursionDepth;						}
getMaxShaderGroupStride(void)3685 	uint32_t		getMaxShaderGroupStride						(void)	override { return m_rayTracingPipelineProperties.maxShaderGroupStride;						}
getShaderGroupBaseAlignment(void)3686 	uint32_t		getShaderGroupBaseAlignment					(void)	override { return m_rayTracingPipelineProperties.shaderGroupBaseAlignment;					}
getMaxGeometryCount(void)3687 	uint64_t		getMaxGeometryCount							(void)	override { return m_accelerationStructureProperties.maxGeometryCount;						}
getMaxInstanceCount(void)3688 	uint64_t		getMaxInstanceCount							(void)	override { return m_accelerationStructureProperties.maxInstanceCount;						}
getMaxPrimitiveCount(void)3689 	uint64_t		getMaxPrimitiveCount						(void)	override { return m_accelerationStructureProperties.maxPrimitiveCount;						}
getMaxDescriptorSetAccelerationStructures(void)3690 	uint32_t		getMaxDescriptorSetAccelerationStructures	(void)	override { return m_accelerationStructureProperties.maxDescriptorSetAccelerationStructures;	}
getMaxRayDispatchInvocationCount(void)3691 	uint32_t		getMaxRayDispatchInvocationCount			(void)	override { return m_rayTracingPipelineProperties.maxRayDispatchInvocationCount;				}
getMaxRayHitAttributeSize(void)3692 	uint32_t		getMaxRayHitAttributeSize					(void)	override { return m_rayTracingPipelineProperties.maxRayHitAttributeSize;					}
getMaxMemoryAllocationCount(void)3693 	uint32_t		getMaxMemoryAllocationCount					(void)	override { return m_maxMemoryAllocationCount;												}
3694 
3695 protected:
3696 	VkPhysicalDeviceAccelerationStructurePropertiesKHR	m_accelerationStructureProperties;
3697 	VkPhysicalDeviceRayTracingPipelinePropertiesKHR		m_rayTracingPipelineProperties;
3698 	deUint32											m_maxMemoryAllocationCount;
3699 };
3700 
~RayTracingPropertiesKHR()3701 RayTracingPropertiesKHR::~RayTracingPropertiesKHR ()
3702 {
3703 }
3704 
RayTracingPropertiesKHR(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3705 RayTracingPropertiesKHR::RayTracingPropertiesKHR (const InstanceInterface&	vki,
3706 												  const VkPhysicalDevice	physicalDevice)
3707 	: RayTracingProperties	(vki, physicalDevice)
3708 {
3709 	m_accelerationStructureProperties	= getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3710 	m_rayTracingPipelineProperties		= getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3711 	m_maxMemoryAllocationCount			= getPhysicalDeviceProperties(vki, physicalDevice).limits.maxMemoryAllocationCount;
3712 }
3713 
makeRayTracingProperties(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3714 de::MovePtr<RayTracingProperties> makeRayTracingProperties (const InstanceInterface&	vki,
3715 															const VkPhysicalDevice		physicalDevice)
3716 {
3717 	return de::MovePtr<RayTracingProperties>(new RayTracingPropertiesKHR(vki, physicalDevice));
3718 }
3719 
cmdTraceRaysKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3720 static inline void cmdTraceRaysKHR (const DeviceInterface&					vk,
3721 									VkCommandBuffer							commandBuffer,
3722 									const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3723 									const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3724 									const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3725 									const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3726 									deUint32								width,
3727 									deUint32								height,
3728 									deUint32								depth)
3729 {
3730 	return vk.cmdTraceRaysKHR(commandBuffer,
3731 							  raygenShaderBindingTableRegion,
3732 							  missShaderBindingTableRegion,
3733 							  hitShaderBindingTableRegion,
3734 							  callableShaderBindingTableRegion,
3735 							  width,
3736 							  height,
3737 							  depth);
3738 }
3739 
3740 
cmdTraceRays(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3741 void cmdTraceRays (const DeviceInterface&					vk,
3742 				   VkCommandBuffer							commandBuffer,
3743 				   const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3744 				   const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3745 				   const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3746 				   const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3747 				   deUint32									width,
3748 				   deUint32									height,
3749 				   deUint32									depth)
3750 {
3751 	DE_ASSERT(raygenShaderBindingTableRegion	!= DE_NULL);
3752 	DE_ASSERT(missShaderBindingTableRegion		!= DE_NULL);
3753 	DE_ASSERT(hitShaderBindingTableRegion		!= DE_NULL);
3754 	DE_ASSERT(callableShaderBindingTableRegion	!= DE_NULL);
3755 
3756 	return cmdTraceRaysKHR(vk,
3757 						   commandBuffer,
3758 						   raygenShaderBindingTableRegion,
3759 						   missShaderBindingTableRegion,
3760 						   hitShaderBindingTableRegion,
3761 						   callableShaderBindingTableRegion,
3762 						   width,
3763 						   height,
3764 						   depth);
3765 }
3766 
cmdTraceRaysIndirectKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3767 static inline void cmdTraceRaysIndirectKHR (const DeviceInterface&					vk,
3768 											VkCommandBuffer							commandBuffer,
3769 											const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3770 											const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3771 											const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3772 											const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3773 											VkDeviceAddress							indirectDeviceAddress )
3774 {
3775 	DE_ASSERT(raygenShaderBindingTableRegion	!= DE_NULL);
3776 	DE_ASSERT(missShaderBindingTableRegion		!= DE_NULL);
3777 	DE_ASSERT(hitShaderBindingTableRegion		!= DE_NULL);
3778 	DE_ASSERT(callableShaderBindingTableRegion	!= DE_NULL);
3779 	DE_ASSERT(indirectDeviceAddress				!= 0);
3780 
3781 	return vk.cmdTraceRaysIndirectKHR(commandBuffer,
3782 									  raygenShaderBindingTableRegion,
3783 									  missShaderBindingTableRegion,
3784 									  hitShaderBindingTableRegion,
3785 									  callableShaderBindingTableRegion,
3786 									  indirectDeviceAddress);
3787 }
3788 
cmdTraceRaysIndirect(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3789 void cmdTraceRaysIndirect (const DeviceInterface&					vk,
3790 						   VkCommandBuffer							commandBuffer,
3791 						   const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3792 						   const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3793 						   const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3794 						   const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3795 						   VkDeviceAddress							indirectDeviceAddress)
3796 {
3797 	return cmdTraceRaysIndirectKHR(vk,
3798 								   commandBuffer,
3799 								   raygenShaderBindingTableRegion,
3800 								   missShaderBindingTableRegion,
3801 								   hitShaderBindingTableRegion,
3802 								   callableShaderBindingTableRegion,
3803 								   indirectDeviceAddress);
3804 }
3805 
cmdTraceRaysIndirect2KHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)3806 static inline void cmdTraceRaysIndirect2KHR (const DeviceInterface&	vk,
3807 											VkCommandBuffer			commandBuffer,
3808 											VkDeviceAddress			indirectDeviceAddress )
3809 {
3810 	DE_ASSERT(indirectDeviceAddress != 0);
3811 
3812 	return vk.cmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
3813 }
3814 
cmdTraceRaysIndirect2(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)3815 void cmdTraceRaysIndirect2	(const DeviceInterface&	vk,
3816 							 VkCommandBuffer		commandBuffer,
3817 							 VkDeviceAddress		indirectDeviceAddress)
3818 {
3819 	return cmdTraceRaysIndirect2KHR(vk, commandBuffer, indirectDeviceAddress);
3820 }
3821 
3822 #else
3823 
3824 deUint32 rayTracingDefineAnything()
3825 {
3826 	return 0;
3827 }
3828 
3829 #endif // CTS_USES_VULKANSC
3830 
3831 } // vk
3832