• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan CTS Framework
3  * --------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Utilities for creating commonly used Vulkan objects
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vkRayTracingUtil.hpp"
25 
26 #include "vkRefUtil.hpp"
27 #include "vkQueryUtil.hpp"
28 #include "vkObjUtil.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkCmdUtil.hpp"
31 
32 #include "deStringUtil.hpp"
33 #include "deSTLUtil.hpp"
34 
35 #include <vector>
36 #include <string>
37 #include <thread>
38 #include <limits>
39 #include <type_traits>
40 #include <map>
41 
42 namespace vk
43 {
44 
45 #ifndef CTS_USES_VULKANSC
46 
47 static const deUint32 WATCHDOG_INTERVAL = 16384; // Touch watchDog every N iterations.
48 
49 struct DeferredThreadParams
50 {
51 	const DeviceInterface&	vk;
52 	VkDevice				device;
53 	VkDeferredOperationKHR	deferredOperation;
54 	VkResult				result;
55 };
56 
getFormatSimpleName(vk::VkFormat format)57 std::string getFormatSimpleName (vk::VkFormat format)
58 {
59 	constexpr size_t kPrefixLen = 10; // strlen("VK_FORMAT_")
60 	return de::toLower(de::toString(format).substr(kPrefixLen));
61 }
62 
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)63 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
64 {
65 	float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
66 	float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
67 
68 	if ((s < 0) != (t < 0))
69 		return false;
70 
71 	float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
72 
73 	return a < 0 ?
74 		(s <= 0 && s + t >= a) :
75 		(s >= 0 && s + t <= a);
76 }
77 
78 // Returns true if VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR needs to be supported for the given format.
isMandatoryAccelerationStructureVertexBufferFormat(vk::VkFormat format)79 static bool isMandatoryAccelerationStructureVertexBufferFormat (vk::VkFormat format)
80 {
81 	bool mandatory = false;
82 
83 	switch (format)
84 	{
85     case VK_FORMAT_R32G32_SFLOAT:
86     case VK_FORMAT_R32G32B32_SFLOAT:
87     case VK_FORMAT_R16G16_SFLOAT:
88     case VK_FORMAT_R16G16B16A16_SFLOAT:
89     case VK_FORMAT_R16G16_SNORM:
90     case VK_FORMAT_R16G16B16A16_SNORM:
91 		mandatory = true;
92 		break;
93 	default:
94 		break;
95 	}
96 
97 	return mandatory;
98 }
99 
checkAccelerationStructureVertexBufferFormat(const vk::InstanceInterface & vki,vk::VkPhysicalDevice physicalDevice,vk::VkFormat format)100 void checkAccelerationStructureVertexBufferFormat (const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice, vk::VkFormat format)
101 {
102 	const vk::VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
103 
104 	if ((formatProperties.bufferFeatures & vk::VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR) == 0u)
105 	{
106 		const std::string errorMsg = "Format not supported for acceleration structure vertex buffers";
107 		if (isMandatoryAccelerationStructureVertexBufferFormat(format))
108 			TCU_FAIL(errorMsg);
109 		TCU_THROW(NotSupportedError, errorMsg);
110 	}
111 }
112 
getCommonRayGenerationShader(void)113 std::string getCommonRayGenerationShader (void)
114 {
115 	return
116 		"#version 460 core\n"
117 		"#extension GL_EXT_ray_tracing : require\n"
118 		"layout(location = 0) rayPayloadEXT vec3 hitValue;\n"
119 		"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
120 		"\n"
121 		"void main()\n"
122 		"{\n"
123 		"  uint  rayFlags = 0;\n"
124 		"  uint  cullMask = 0xFF;\n"
125 		"  float tmin     = 0.0;\n"
126 		"  float tmax     = 9.0;\n"
127 		"  vec3  origin   = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
128 		"  vec3  direct   = vec3(0.0, 0.0, -1.0);\n"
129 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
130 		"}\n";
131 }
132 
RaytracedGeometryBase(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType)133 RaytracedGeometryBase::RaytracedGeometryBase (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType)
134 	: m_geometryType	(geometryType)
135 	, m_vertexFormat	(vertexFormat)
136 	, m_indexType		(indexType)
137 	, m_geometryFlags	((VkGeometryFlagsKHR)0u)
138 	, m_hasOpacityMicromap (false)
139 {
140 	if (m_geometryType == VK_GEOMETRY_TYPE_AABBS_KHR)
141 		DE_ASSERT(m_vertexFormat == VK_FORMAT_R32G32B32_SFLOAT);
142 }
143 
~RaytracedGeometryBase()144 RaytracedGeometryBase::~RaytracedGeometryBase ()
145 {
146 }
147 
148 struct GeometryBuilderParams
149 {
150 	VkGeometryTypeKHR	geometryType;
151 	bool				usePadding;
152 };
153 
154 template <typename V, typename I>
buildRaytracedGeometry(const GeometryBuilderParams & params)155 RaytracedGeometryBase* buildRaytracedGeometry (const GeometryBuilderParams& params)
156 {
157 	return new RaytracedGeometry<V, I>(params.geometryType, (params.usePadding ? 1u : 0u));
158 }
159 
makeRaytracedGeometry(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType,bool padVertices)160 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType, bool padVertices)
161 {
162 	const GeometryBuilderParams builderParams { geometryType, padVertices };
163 
164 	switch (vertexFormat)
165 	{
166 		case VK_FORMAT_R32G32_SFLOAT:
167 			switch (indexType)
168 			{
169 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint16>(builderParams));
170 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint32>(builderParams));
171 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, EmptyIndex>(builderParams));
172 				default:						TCU_THROW(InternalError, "Wrong index type");
173 			}
174 		case VK_FORMAT_R32G32B32_SFLOAT:
175 			switch (indexType)
176 			{
177 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint16>(builderParams));
178 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint32>(builderParams));
179 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, EmptyIndex>(builderParams));
180 				default:						TCU_THROW(InternalError, "Wrong index type");
181 			}
182 		case VK_FORMAT_R32G32B32A32_SFLOAT:
183 			switch (indexType)
184 			{
185 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint16>(builderParams));
186 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint32>(builderParams));
187 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, EmptyIndex>(builderParams));
188 				default:						TCU_THROW(InternalError, "Wrong index type");
189 			}
190 		case VK_FORMAT_R16G16_SFLOAT:
191 			switch (indexType)
192 			{
193 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint16>(builderParams));
194 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint32>(builderParams));
195 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, EmptyIndex>(builderParams));
196 				default:						TCU_THROW(InternalError, "Wrong index type");
197 			}
198 		case VK_FORMAT_R16G16B16_SFLOAT:
199 			switch (indexType)
200 			{
201 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint16>(builderParams));
202 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint32>(builderParams));
203 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, EmptyIndex>(builderParams));
204 				default:						TCU_THROW(InternalError, "Wrong index type");
205 			}
206 		case VK_FORMAT_R16G16B16A16_SFLOAT:
207 			switch (indexType)
208 			{
209 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint16>(builderParams));
210 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint32>(builderParams));
211 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, EmptyIndex>(builderParams));
212 				default:						TCU_THROW(InternalError, "Wrong index type");
213 			}
214 		case VK_FORMAT_R16G16_SNORM:
215 			switch (indexType)
216 			{
217 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint16>(builderParams));
218 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint32>(builderParams));
219 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, EmptyIndex>(builderParams));
220 				default:						TCU_THROW(InternalError, "Wrong index type");
221 			}
222 		case VK_FORMAT_R16G16B16_SNORM:
223 			switch (indexType)
224 			{
225 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint16>(builderParams));
226 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint32>(builderParams));
227 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, EmptyIndex>(builderParams));
228 				default:						TCU_THROW(InternalError, "Wrong index type");
229 			}
230 		case VK_FORMAT_R16G16B16A16_SNORM:
231 			switch (indexType)
232 			{
233 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint16>(builderParams));
234 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint32>(builderParams));
235 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, EmptyIndex>(builderParams));
236 				default:						TCU_THROW(InternalError, "Wrong index type");
237 			}
238 		case VK_FORMAT_R64G64_SFLOAT:
239 			switch (indexType)
240 			{
241 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint16>(builderParams));
242 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint32>(builderParams));
243 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, EmptyIndex>(builderParams));
244 				default:						TCU_THROW(InternalError, "Wrong index type");
245 			}
246 		case VK_FORMAT_R64G64B64_SFLOAT:
247 			switch (indexType)
248 			{
249 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint16>(builderParams));
250 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint32>(builderParams));
251 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, EmptyIndex>(builderParams));
252 				default:						TCU_THROW(InternalError, "Wrong index type");
253 			}
254 		case VK_FORMAT_R64G64B64A64_SFLOAT:
255 			switch (indexType)
256 			{
257 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint16>(builderParams));
258 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint32>(builderParams));
259 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, EmptyIndex>(builderParams));
260 				default:						TCU_THROW(InternalError, "Wrong index type");
261 			}
262 		case VK_FORMAT_R8G8_SNORM:
263 			switch (indexType)
264 			{
265 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint16>(builderParams));
266 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint32>(builderParams));
267 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, EmptyIndex>(builderParams));
268 				default:						TCU_THROW(InternalError, "Wrong index type");
269 			}
270 		case VK_FORMAT_R8G8B8_SNORM:
271 			switch (indexType)
272 			{
273 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint16>(builderParams));
274 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint32>(builderParams));
275 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, EmptyIndex>(builderParams));
276 				default:						TCU_THROW(InternalError, "Wrong index type");
277 			}
278 		case VK_FORMAT_R8G8B8A8_SNORM:
279 			switch (indexType)
280 			{
281 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint16>(builderParams));
282 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint32>(builderParams));
283 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, EmptyIndex>(builderParams));
284 				default:						TCU_THROW(InternalError, "Wrong index type");
285 			}
286 		default:
287 			TCU_THROW(InternalError, "Wrong vertex format");
288 	}
289 
290 }
291 
getBufferDeviceAddress(const DeviceInterface & vk,const VkDevice device,const VkBuffer buffer,VkDeviceSize offset)292 VkDeviceAddress getBufferDeviceAddress ( const DeviceInterface&	vk,
293 										 const VkDevice			device,
294 										 const VkBuffer			buffer,
295 										 VkDeviceSize			offset )
296 {
297 
298 	if (buffer == DE_NULL)
299 		return 0;
300 
301 	VkBufferDeviceAddressInfo deviceAddressInfo
302 	{
303 		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,		// VkStructureType    sType
304 		DE_NULL,											// const void*        pNext
305 		buffer												// VkBuffer           buffer;
306 	};
307 	return vk.getBufferDeviceAddress(device, &deviceAddressInfo) + offset;
308 }
309 
310 
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,deUint32 queryCount)311 static inline Move<VkQueryPool> makeQueryPool (const DeviceInterface&		vk,
312 											   const VkDevice				device,
313 											   const VkQueryType			queryType,
314 											   deUint32					queryCount)
315 {
316 	const VkQueryPoolCreateInfo				queryPoolCreateInfo =
317 	{
318 		VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,		// sType
319 		DE_NULL,										// pNext
320 		(VkQueryPoolCreateFlags)0,						// flags
321 		queryType,										// queryType
322 		queryCount,										// queryCount
323 		0u,												// pipelineStatistics
324 	};
325 	return createQueryPool(vk, device, &queryPoolCreateInfo);
326 }
327 
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryTrianglesDataKHR & triangles)328 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryTrianglesDataKHR& triangles)
329 {
330 	VkAccelerationStructureGeometryDataKHR result;
331 
332 	deMemset(&result, 0, sizeof(result));
333 
334 	result.triangles = triangles;
335 
336 	return result;
337 }
338 
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryAabbsDataKHR & aabbs)339 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryAabbsDataKHR& aabbs)
340 {
341 	VkAccelerationStructureGeometryDataKHR result;
342 
343 	deMemset(&result, 0, sizeof(result));
344 
345 	result.aabbs = aabbs;
346 
347 	return result;
348 }
349 
makeVkAccelerationStructureInstancesDataKHR(const VkAccelerationStructureGeometryInstancesDataKHR & instances)350 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureInstancesDataKHR (const VkAccelerationStructureGeometryInstancesDataKHR& instances)
351 {
352 	VkAccelerationStructureGeometryDataKHR result;
353 
354 	deMemset(&result, 0, sizeof(result));
355 
356 	result.instances = instances;
357 
358 	return result;
359 }
360 
makeVkAccelerationStructureInstanceKHR(const VkTransformMatrixKHR & transform,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags,deUint64 accelerationStructureReference)361 static inline VkAccelerationStructureInstanceKHR makeVkAccelerationStructureInstanceKHR (const VkTransformMatrixKHR&			transform,
362 																						 deUint32								instanceCustomIndex,
363 																						 deUint32								mask,
364 																						 deUint32								instanceShaderBindingTableRecordOffset,
365 																						 VkGeometryInstanceFlagsKHR				flags,
366 																						 deUint64								accelerationStructureReference)
367 {
368 	VkAccelerationStructureInstanceKHR instance		= { transform, 0, 0, 0, 0, accelerationStructureReference };
369 	instance.instanceCustomIndex					= instanceCustomIndex & 0xFFFFFF;
370 	instance.mask									= mask & 0xFF;
371 	instance.instanceShaderBindingTableRecordOffset	= instanceShaderBindingTableRecordOffset & 0xFFFFFF;
372 	instance.flags									= flags & 0xFF;
373 	return instance;
374 }
375 
getRayTracingShaderGroupHandlesKHR(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)376 VkResult getRayTracingShaderGroupHandlesKHR (const DeviceInterface&		vk,
377 											 const VkDevice				device,
378 											 const VkPipeline			pipeline,
379 											 const deUint32				firstGroup,
380 											 const deUint32				groupCount,
381 											 const deUintptr			dataSize,
382 											 void*						pData)
383 {
384 	return vk.getRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
385 }
386 
getRayTracingShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)387 VkResult getRayTracingShaderGroupHandles (const DeviceInterface&		vk,
388 										  const VkDevice				device,
389 										  const VkPipeline				pipeline,
390 										  const deUint32				firstGroup,
391 										  const deUint32				groupCount,
392 										  const deUintptr				dataSize,
393 										  void*							pData)
394 {
395 	return getRayTracingShaderGroupHandlesKHR(vk, device, pipeline, firstGroup, groupCount, dataSize, pData);
396 }
397 
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation)398 VkResult finishDeferredOperation (const DeviceInterface&	vk,
399 								  VkDevice					device,
400 								  VkDeferredOperationKHR	deferredOperation)
401 {
402 	VkResult result = vk.deferredOperationJoinKHR(device, deferredOperation);
403 
404 	while (result == VK_THREAD_IDLE_KHR)
405 	{
406 		std::this_thread::yield();
407 		result = vk.deferredOperationJoinKHR(device, deferredOperation);
408 	}
409 
410 	switch( result )
411 	{
412 		case VK_SUCCESS:
413 		{
414 			// Deferred operation has finished. Query its result
415 			result = vk.getDeferredOperationResultKHR(device, deferredOperation);
416 
417 			break;
418 		}
419 
420 		case VK_THREAD_DONE_KHR:
421 		{
422 			// Deferred operation is being wrapped up by another thread
423 			// wait for that thread to finish
424 			do
425 			{
426 				std::this_thread::yield();
427 				result = vk.getDeferredOperationResultKHR(device, deferredOperation);
428 			} while (result == VK_NOT_READY);
429 
430 			break;
431 		}
432 
433 		default:
434 		{
435 			DE_ASSERT(false);
436 
437 			break;
438 		}
439 	}
440 
441 	return result;
442 }
443 
finishDeferredOperationThreaded(DeferredThreadParams * deferredThreadParams)444 void finishDeferredOperationThreaded (DeferredThreadParams* deferredThreadParams)
445 {
446 	deferredThreadParams->result = finishDeferredOperation(deferredThreadParams->vk, deferredThreadParams->device, deferredThreadParams->deferredOperation);
447 }
448 
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation,const deUint32 workerThreadCount,const bool operationNotDeferred)449 void finishDeferredOperation (const DeviceInterface&	vk,
450 							  VkDevice					device,
451 							  VkDeferredOperationKHR	deferredOperation,
452 							  const deUint32			workerThreadCount,
453 							  const bool				operationNotDeferred)
454 {
455 
456 	if (operationNotDeferred)
457 	{
458 		// when the operation deferral returns VK_OPERATION_NOT_DEFERRED_KHR,
459 		// the deferred operation should act as if no command was deferred
460 		VK_CHECK(vk.getDeferredOperationResultKHR(device, deferredOperation));
461 
462 
463 		// there is not need to join any threads to the deferred operation,
464 		// so below can be skipped.
465 		return;
466 	}
467 
468 	if (workerThreadCount == 0)
469 	{
470 		VK_CHECK(finishDeferredOperation(vk, device, deferredOperation));
471 	}
472 	else
473 	{
474 		const deUint32							maxThreadCountSupported	= deMinu32(256u, vk.getDeferredOperationMaxConcurrencyKHR(device, deferredOperation));
475 		const deUint32							requestedThreadCount	= workerThreadCount;
476 		const deUint32							testThreadCount			= requestedThreadCount == std::numeric_limits<deUint32>::max() ? maxThreadCountSupported : requestedThreadCount;
477 
478 		if (maxThreadCountSupported == 0)
479 			TCU_FAIL("vkGetDeferredOperationMaxConcurrencyKHR must not return 0");
480 
481 		const DeferredThreadParams				deferredThreadParams	=
482 		{
483 			vk,					//  const DeviceInterface&	vk;
484 			device,				//  VkDevice				device;
485 			deferredOperation,	//  VkDeferredOperationKHR	deferredOperation;
486 			VK_RESULT_MAX_ENUM,	//  VResult					result;
487 		};
488 		std::vector<DeferredThreadParams>		threadParams	(testThreadCount, deferredThreadParams);
489 		std::vector<de::MovePtr<std::thread> >	threads			(testThreadCount);
490 		bool									executionResult	= false;
491 
492 		DE_ASSERT(threads.size() > 0 && threads.size() == testThreadCount);
493 
494 		for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
495 			threads[threadNdx] = de::MovePtr<std::thread>(new std::thread(finishDeferredOperationThreaded, &threadParams[threadNdx]));
496 
497 		for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
498 			threads[threadNdx]->join();
499 
500 		for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
501 			if (threadParams[threadNdx].result == VK_SUCCESS)
502 				executionResult = true;
503 
504 		if (!executionResult)
505 			TCU_FAIL("Neither reported VK_SUCCESS");
506 	}
507 }
508 
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const VkDeviceSize storageSize)509 SerialStorage::SerialStorage (const DeviceInterface&									vk,
510 							  const VkDevice											device,
511 							  Allocator&												allocator,
512 							  const VkAccelerationStructureBuildTypeKHR					buildType,
513 							  const VkDeviceSize										storageSize)
514 	: m_buildType		(buildType)
515 	, m_storageSize		(storageSize)
516 	, m_serialInfo		()
517 {
518 	const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
519 	try
520 	{
521 		m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
522 	}
523 	catch (const tcu::NotSupportedError&)
524 	{
525 		// retry without Cached flag
526 		m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
527 	}
528 }
529 
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const SerialInfo & serialInfo)530 SerialStorage::SerialStorage (const DeviceInterface&						vk,
531 							  const VkDevice								device,
532 							  Allocator&									allocator,
533 							  const VkAccelerationStructureBuildTypeKHR		buildType,
534 							  const SerialInfo&								serialInfo)
535 	: m_buildType		(buildType)
536 	, m_storageSize		(serialInfo.sizes()[0])	// raise assertion if serialInfo is empty
537 	, m_serialInfo		(serialInfo)
538 {
539 	DE_ASSERT(serialInfo.sizes().size() >= 2u);
540 
541 	// create buffer for top-level acceleration structure
542 	{
543 		const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(m_storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
544 		m_buffer										= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
545 	}
546 
547 	// create buffers for bottom-level acceleration structures
548 	{
549 		std::vector<deUint64>	addrs;
550 
551 		for (std::size_t i = 1; i < serialInfo.addresses().size(); ++i)
552 		{
553 			const deUint64& lookAddr = serialInfo.addresses()[i];
554 			auto end = addrs.end();
555 			auto match = std::find_if(addrs.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
556 			if (match == end)
557 			{
558 				addrs.emplace_back(lookAddr);
559 				m_bottoms.emplace_back(de::SharedPtr<SerialStorage>(new SerialStorage(vk, device, allocator, buildType, serialInfo.sizes()[i])));
560 			}
561 		}
562 	}
563 }
564 
getAddress(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)565 VkDeviceOrHostAddressKHR SerialStorage::getAddress (const DeviceInterface&						vk,
566 													const VkDevice								device,
567 													const VkAccelerationStructureBuildTypeKHR	buildType)
568 {
569 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
570 		return makeDeviceOrHostAddressKHR(vk, device, m_buffer->get(), 0);
571 	else
572 		return makeDeviceOrHostAddressKHR(m_buffer->getAllocation().getHostPtr());
573 }
574 
getASHeader()575 SerialStorage::AccelerationStructureHeader* SerialStorage::getASHeader ()
576 {
577 	return reinterpret_cast<AccelerationStructureHeader*>(getHostAddress().hostAddress);
578 }
579 
hasDeepFormat() const580 bool SerialStorage::hasDeepFormat () const
581 {
582 	return (m_serialInfo.sizes().size() >= 2u);
583 }
584 
getBottomStorage(deUint32 index) const585 de::SharedPtr<SerialStorage> SerialStorage::getBottomStorage (deUint32 index) const
586 {
587 	return m_bottoms[index];
588 }
589 
getHostAddress(VkDeviceSize offset)590 VkDeviceOrHostAddressKHR SerialStorage::getHostAddress (VkDeviceSize offset)
591 {
592 	DE_ASSERT(offset < m_storageSize);
593 	return makeDeviceOrHostAddressKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
594 }
595 
getHostAddressConst(VkDeviceSize offset)596 VkDeviceOrHostAddressConstKHR SerialStorage::getHostAddressConst (VkDeviceSize offset)
597 {
598 	return makeDeviceOrHostAddressConstKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
599 }
600 
getAddressConst(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)601 VkDeviceOrHostAddressConstKHR SerialStorage::getAddressConst (const DeviceInterface&					vk,
602 															  const VkDevice							device,
603 															  const VkAccelerationStructureBuildTypeKHR	buildType)
604 {
605 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
606 		return makeDeviceOrHostAddressConstKHR(vk, device, m_buffer->get(), 0);
607 	else
608 		return getHostAddressConst();
609 }
610 
getStorageSize() const611 inline VkDeviceSize SerialStorage::getStorageSize () const
612 {
613 	return m_storageSize;
614 }
615 
getSerialInfo() const616 inline const SerialInfo& SerialStorage::getSerialInfo () const
617 {
618 	return m_serialInfo;
619 }
620 
getDeserializedSize()621 deUint64 SerialStorage::getDeserializedSize ()
622 {
623 	deUint64		result		= 0;
624 	const deUint8*	startPtr	= static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr());
625 
626 	DE_ASSERT(sizeof(result) == DESERIALIZED_SIZE_SIZE);
627 
628 	deMemcpy(&result, startPtr + DESERIALIZED_SIZE_OFFSET, sizeof(result));
629 
630 	return result;
631 }
632 
~BottomLevelAccelerationStructure()633 BottomLevelAccelerationStructure::~BottomLevelAccelerationStructure ()
634 {
635 }
636 
BottomLevelAccelerationStructure()637 BottomLevelAccelerationStructure::BottomLevelAccelerationStructure ()
638 	: m_structureSize		(0u)
639 	, m_updateScratchSize	(0u)
640 	, m_buildScratchSize	(0u)
641 {
642 }
643 
setGeometryData(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags)644 void BottomLevelAccelerationStructure::setGeometryData (const std::vector<tcu::Vec3>&	geometryData,
645 														const bool						triangles,
646 														const VkGeometryFlagsKHR		geometryFlags)
647 {
648 	if (triangles)
649 		DE_ASSERT((geometryData.size() % 3) == 0);
650 	else
651 		DE_ASSERT((geometryData.size() % 2) == 0);
652 
653 	setGeometryCount(1u);
654 
655 	addGeometry(geometryData, triangles, geometryFlags);
656 }
657 
setDefaultGeometryData(const VkShaderStageFlagBits testStage,const VkGeometryFlagsKHR geometryFlags)658 void BottomLevelAccelerationStructure::setDefaultGeometryData (const VkShaderStageFlagBits	testStage,
659 															   const VkGeometryFlagsKHR		geometryFlags)
660 {
661 	bool					trianglesData	= false;
662 	float					z				= 0.0f;
663 	std::vector<tcu::Vec3>	geometryData;
664 
665 	switch (testStage)
666 	{
667 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:		z = -1.0f; trianglesData = true;	break;
668 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:		z = -1.0f; trianglesData = true;	break;
669 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:	z = -1.0f; trianglesData = true;	break;
670 		case VK_SHADER_STAGE_MISS_BIT_KHR:			z = -9.9f; trianglesData = true;	break;
671 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:	z = -1.0f; trianglesData = false;	break;
672 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:		z = -1.0f; trianglesData = true;	break;
673 		default:									TCU_THROW(InternalError, "Unacceptable stage");
674 	}
675 
676 	if (trianglesData)
677 	{
678 		geometryData.reserve(6);
679 
680 		geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
681 		geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
682 		geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
683 		geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
684 		geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
685 		geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
686 	}
687 	else
688 	{
689 		geometryData.reserve(2);
690 
691 		geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
692 		geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
693 	}
694 
695 	setGeometryCount(1u);
696 
697 	addGeometry(geometryData, trianglesData, geometryFlags);
698 }
699 
setGeometryCount(const size_t geometryCount)700 void BottomLevelAccelerationStructure::setGeometryCount (const size_t geometryCount)
701 {
702 	m_geometriesData.clear();
703 
704 	m_geometriesData.reserve(geometryCount);
705 }
706 
addGeometry(de::SharedPtr<RaytracedGeometryBase> & raytracedGeometry)707 void BottomLevelAccelerationStructure::addGeometry (de::SharedPtr<RaytracedGeometryBase>&		raytracedGeometry)
708 {
709 	m_geometriesData.push_back(raytracedGeometry);
710 }
711 
addGeometry(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags,const VkAccelerationStructureTrianglesOpacityMicromapEXT * opacityGeometryMicromap)712 void BottomLevelAccelerationStructure::addGeometry (const std::vector<tcu::Vec3>&	geometryData,
713 													const bool						triangles,
714 													const VkGeometryFlagsKHR		geometryFlags,
715 													const VkAccelerationStructureTrianglesOpacityMicromapEXT* opacityGeometryMicromap)
716 {
717 	DE_ASSERT(geometryData.size() > 0);
718 	DE_ASSERT((triangles && geometryData.size() % 3 == 0) || (!triangles && geometryData.size() % 2 == 0));
719 
720 	if (!triangles)
721 		for (size_t posNdx = 0; posNdx < geometryData.size() / 2; ++posNdx)
722 		{
723 			DE_ASSERT(geometryData[2 * posNdx].x() <= geometryData[2 * posNdx + 1].x());
724 			DE_ASSERT(geometryData[2 * posNdx].y() <= geometryData[2 * posNdx + 1].y());
725 			DE_ASSERT(geometryData[2 * posNdx].z() <= geometryData[2 * posNdx + 1].z());
726 		}
727 
728 	de::SharedPtr<RaytracedGeometryBase> geometry = makeRaytracedGeometry(triangles ? VK_GEOMETRY_TYPE_TRIANGLES_KHR : VK_GEOMETRY_TYPE_AABBS_KHR, VK_FORMAT_R32G32B32_SFLOAT, VK_INDEX_TYPE_NONE_KHR);
729 	for (auto it = begin(geometryData), eit = end(geometryData); it != eit; ++it)
730 		geometry->addVertex(*it);
731 
732 	geometry->setGeometryFlags(geometryFlags);
733 	if (opacityGeometryMicromap)
734 		geometry->setOpacityMicromap(opacityGeometryMicromap);
735 	addGeometry(geometry);
736 }
737 
getStructureBuildSizes() const738 VkAccelerationStructureBuildSizesInfoKHR BottomLevelAccelerationStructure::getStructureBuildSizes () const
739 {
740 	return
741 	{
742 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
743 		DE_NULL,														//  const void*		pNext;
744 		m_structureSize,												//  VkDeviceSize	accelerationStructureSize;
745 		m_updateScratchSize,											//  VkDeviceSize	updateScratchSize;
746 		m_buildScratchSize												//  VkDeviceSize	buildScratchSize;
747 	};
748 };
749 
getVertexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)750 VkDeviceSize getVertexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
751 {
752 	DE_ASSERT(geometriesData.size() != 0);
753 	VkDeviceSize					bufferSizeBytes = 0;
754 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
755 		bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getVertexByteSize(),8);
756 	return bufferSizeBytes;
757 }
758 
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)759 BufferWithMemory* createVertexBuffer (const DeviceInterface&	vk,
760 									  const VkDevice			device,
761 									  Allocator&				allocator,
762 									  const VkDeviceSize		bufferSizeBytes)
763 {
764 	const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
765 	return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
766 }
767 
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)768 BufferWithMemory* createVertexBuffer (const DeviceInterface&									vk,
769 									  const VkDevice											device,
770 									  Allocator&												allocator,
771 									  const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
772 {
773 	return createVertexBuffer(vk, device, allocator, getVertexBufferSize(geometriesData));
774 }
775 
updateVertexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * vertexBuffer,VkDeviceSize geometriesOffset=0)776 void updateVertexBuffer (const DeviceInterface&										vk,
777 						 const VkDevice												device,
778 						 const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData,
779 						 BufferWithMemory*											vertexBuffer,
780 						 VkDeviceSize												geometriesOffset = 0)
781 {
782 	const Allocation&				geometryAlloc		= vertexBuffer->getAllocation();
783 	deUint8*						bufferStart			= static_cast<deUint8*>(geometryAlloc.getHostPtr());
784 	VkDeviceSize					bufferOffset		= geometriesOffset;
785 
786 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
787 	{
788 		const void*					geometryPtr			= geometriesData[geometryNdx]->getVertexPointer();
789 		const size_t				geometryPtrSize		= geometriesData[geometryNdx]->getVertexByteSize();
790 
791 		deMemcpy(&bufferStart[bufferOffset], geometryPtr, geometryPtrSize);
792 
793 		bufferOffset += deAlignSize(geometryPtrSize,8);
794 	}
795 
796 	// Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
797 	// align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
798 	// for the vertex and index buffers, so flushing is actually not needed.
799 	flushAlloc(vk, device, geometryAlloc);
800 }
801 
getIndexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)802 VkDeviceSize getIndexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
803 {
804 	DE_ASSERT(!geometriesData.empty());
805 
806 	VkDeviceSize	bufferSizeBytes = 0;
807 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
808 		if(geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
809 			bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getIndexByteSize(),8);
810 	return bufferSizeBytes;
811 }
812 
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)813 BufferWithMemory* createIndexBuffer (const DeviceInterface&		vk,
814 									 const VkDevice				device,
815 									 Allocator&					allocator,
816 									 const VkDeviceSize			bufferSizeBytes)
817 {
818 	DE_ASSERT(bufferSizeBytes);
819 	const VkBufferCreateInfo		bufferCreateInfo	= makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
820 	return  new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
821 }
822 
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)823 BufferWithMemory* createIndexBuffer (const DeviceInterface&										vk,
824 									 const VkDevice												device,
825 									 Allocator&													allocator,
826 									 const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
827 {
828 
829 
830 	const VkDeviceSize bufferSizeBytes = getIndexBufferSize(geometriesData);
831 	return bufferSizeBytes ? createIndexBuffer(vk, device, allocator, bufferSizeBytes) : nullptr;
832 }
833 
updateIndexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * indexBuffer,VkDeviceSize geometriesOffset)834 void updateIndexBuffer (const DeviceInterface&										vk,
835 						const VkDevice												device,
836 						const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData,
837 						BufferWithMemory*											indexBuffer,
838 						VkDeviceSize												geometriesOffset)
839 {
840 	const Allocation&				indexAlloc			= indexBuffer->getAllocation();
841 	deUint8*						bufferStart			= static_cast<deUint8*>(indexAlloc.getHostPtr());
842 	VkDeviceSize					bufferOffset		= geometriesOffset;
843 
844 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
845 	{
846 		if (geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
847 		{
848 			const void*					indexPtr		= geometriesData[geometryNdx]->getIndexPointer();
849 			const size_t				indexPtrSize	= geometriesData[geometryNdx]->getIndexByteSize();
850 
851 			deMemcpy(&bufferStart[bufferOffset], indexPtr, indexPtrSize);
852 
853 			bufferOffset += deAlignSize(indexPtrSize, 8);
854 		}
855 	}
856 
857 	// Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
858 	// align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
859 	// for the vertex and index buffers, so flushing is actually not needed.
860 	flushAlloc(vk, device, indexAlloc);
861 }
862 
863 class BottomLevelAccelerationStructureKHR : public BottomLevelAccelerationStructure
864 {
865 public:
866 	static deUint32											getRequiredAllocationCount						(void);
867 
868 															BottomLevelAccelerationStructureKHR				();
869 															BottomLevelAccelerationStructureKHR				(const BottomLevelAccelerationStructureKHR&		other) = delete;
870 	virtual													~BottomLevelAccelerationStructureKHR			();
871 
872 	void													setBuildType									(const VkAccelerationStructureBuildTypeKHR		buildType) override;
873 	VkAccelerationStructureBuildTypeKHR						getBuildType									() const override;
874 	void													setCreateFlags									(const VkAccelerationStructureCreateFlagsKHR	createFlags) override;
875 	void													setCreateGeneric								(bool											createGeneric) override;
876 	void													setBuildFlags									(const VkBuildAccelerationStructureFlagsKHR		buildFlags) override;
877 	void													setBuildWithoutGeometries						(bool											buildWithoutGeometries) override;
878 	void													setBuildWithoutPrimitives						(bool											buildWithoutPrimitives) override;
879 	void													setDeferredOperation							(const bool										deferredOperation,
880 																											 const deUint32									workerThreadCount) override;
881 	void													setUseArrayOfPointers							(const bool										useArrayOfPointers) override;
882 	void													setIndirectBuildParameters						(const VkBuffer									indirectBuffer,
883 																											 const VkDeviceSize								indirectBufferOffset,
884 																											 const deUint32									indirectBufferStride) override;
885 	VkBuildAccelerationStructureFlagsKHR					getBuildFlags									() const override;
886 
887 	void													create											(const DeviceInterface&							vk,
888 																											 const VkDevice									device,
889 																											 Allocator&										allocator,
890 																											 VkDeviceSize									structureSize,
891 																											 VkDeviceAddress								deviceAddress			= 0u,
892 																											 const void*									pNext					= DE_NULL,
893 																											 const MemoryRequirement&						addMemoryRequirement	= MemoryRequirement::Any) override;
894 	void													build											(const DeviceInterface&							vk,
895 																											 const VkDevice									device,
896 																											 const VkCommandBuffer							cmdBuffer) override;
897 	void													copyFrom										(const DeviceInterface&							vk,
898 																											 const VkDevice									device,
899 																											 const VkCommandBuffer							cmdBuffer,
900 																											 BottomLevelAccelerationStructure*				accelerationStructure,
901 																											 bool											compactCopy) override;
902 
903 	void													serialize										(const DeviceInterface&							vk,
904 																											 const VkDevice									device,
905 																											 const VkCommandBuffer							cmdBuffer,
906 																											 SerialStorage*									storage) override;
907 	void													deserialize										(const DeviceInterface&							vk,
908 																											 const VkDevice									device,
909 																											 const VkCommandBuffer							cmdBuffer,
910 																											 SerialStorage*									storage) override;
911 
912 	const VkAccelerationStructureKHR*						getPtr											(void) const override;
913 
914 protected:
915 	VkAccelerationStructureBuildTypeKHR						m_buildType;
916 	VkAccelerationStructureCreateFlagsKHR					m_createFlags;
917 	bool													m_createGeneric;
918 	VkBuildAccelerationStructureFlagsKHR					m_buildFlags;
919 	bool													m_buildWithoutGeometries;
920 	bool													m_buildWithoutPrimitives;
921 	bool													m_deferredOperation;
922 	deUint32												m_workerThreadCount;
923 	bool													m_useArrayOfPointers;
924 	de::MovePtr<BufferWithMemory>							m_accelerationStructureBuffer;
925 	de::MovePtr<BufferWithMemory>							m_vertexBuffer;
926 	de::MovePtr<BufferWithMemory>							m_indexBuffer;
927 	de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
928 	de::UniquePtr<std::vector<deUint8>>						m_hostScratchBuffer;
929 	Move<VkAccelerationStructureKHR>						m_accelerationStructureKHR;
930 	VkBuffer												m_indirectBuffer;
931 	VkDeviceSize											m_indirectBufferOffset;
932 	deUint32												m_indirectBufferStride;
933 
934 	void													prepareGeometries								(const DeviceInterface&												vk,
935 																											 const VkDevice														device,
936 																											 std::vector<VkAccelerationStructureGeometryKHR>&					accelerationStructureGeometriesKHR,
937 																											 std::vector<VkAccelerationStructureGeometryKHR*>&					accelerationStructureGeometriesKHRPointers,
938 																											 std::vector<VkAccelerationStructureBuildRangeInfoKHR>&				accelerationStructureBuildRangeInfoKHR,
939 																											 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>&	accelerationStructureGeometryMicromapsEXT,
940 																											 std::vector<deUint32>&												maxPrimitiveCounts,
941 																											 VkDeviceSize														vertexBufferOffset = 0,
942 																											 VkDeviceSize														indexBufferOffset = 0) const;
943 
getAccelerationStructureBuffer() const944 	virtual BufferWithMemory*								getAccelerationStructureBuffer					() const { return m_accelerationStructureBuffer.get(); }
getDeviceScratchBuffer() const945 	virtual BufferWithMemory*								getDeviceScratchBuffer							() const { return m_deviceScratchBuffer.get(); }
getHostScratchBuffer() const946 	virtual std::vector<deUint8>*							getHostScratchBuffer							() const { return m_hostScratchBuffer.get(); }
getVertexBuffer() const947 	virtual BufferWithMemory*								getVertexBuffer									() const { return m_vertexBuffer.get(); }
getIndexBuffer() const948 	virtual BufferWithMemory*								getIndexBuffer									() const { return m_indexBuffer.get(); }
949 
getAccelerationStructureBufferOffset() const950 	virtual VkDeviceSize									getAccelerationStructureBufferOffset			() const { return 0; }
getDeviceScratchBufferOffset() const951 	virtual VkDeviceSize									getDeviceScratchBufferOffset					() const { return 0; }
getVertexBufferOffset() const952 	virtual VkDeviceSize									getVertexBufferOffset							() const { return 0; }
getIndexBufferOffset() const953 	virtual VkDeviceSize									getIndexBufferOffset							() const { return 0; }
954 };
955 
getRequiredAllocationCount(void)956 deUint32 BottomLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
957 {
958 	/*
959 		de::MovePtr<BufferWithMemory>							m_geometryBuffer; // but only when m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
960 		de::MovePtr<Allocation>									m_accelerationStructureAlloc;
961 		de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
962 	*/
963 	return 3u;
964 }
965 
~BottomLevelAccelerationStructureKHR()966 BottomLevelAccelerationStructureKHR::~BottomLevelAccelerationStructureKHR ()
967 {
968 }
969 
BottomLevelAccelerationStructureKHR()970 BottomLevelAccelerationStructureKHR::BottomLevelAccelerationStructureKHR ()
971 	: BottomLevelAccelerationStructure	()
972 	, m_buildType						(VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
973 	, m_createFlags						(0u)
974 	, m_createGeneric					(false)
975 	, m_buildFlags						(0u)
976 	, m_buildWithoutGeometries			(false)
977 	, m_buildWithoutPrimitives			(false)
978 	, m_deferredOperation				(false)
979 	, m_workerThreadCount				(0)
980 	, m_useArrayOfPointers				(false)
981 	, m_accelerationStructureBuffer		(DE_NULL)
982 	, m_vertexBuffer					(DE_NULL)
983 	, m_indexBuffer						(DE_NULL)
984 	, m_deviceScratchBuffer				(DE_NULL)
985 	, m_hostScratchBuffer				(new std::vector<deUint8>)
986 	, m_accelerationStructureKHR		()
987 	, m_indirectBuffer					(DE_NULL)
988 	, m_indirectBufferOffset			(0)
989 	, m_indirectBufferStride			(0)
990 {
991 }
992 
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)993 void BottomLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR	buildType)
994 {
995 	m_buildType = buildType;
996 }
997 
getBuildType() const998 VkAccelerationStructureBuildTypeKHR BottomLevelAccelerationStructureKHR::getBuildType () const
999 {
1000 	return m_buildType;
1001 }
1002 
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)1003 void BottomLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR	createFlags)
1004 {
1005 	m_createFlags = createFlags;
1006 }
1007 
setCreateGeneric(bool createGeneric)1008 void BottomLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
1009 {
1010 	m_createGeneric = createGeneric;
1011 }
1012 
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)1013 void BottomLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR	buildFlags)
1014 {
1015 	m_buildFlags = buildFlags;
1016 }
1017 
setBuildWithoutGeometries(bool buildWithoutGeometries)1018 void BottomLevelAccelerationStructureKHR::setBuildWithoutGeometries (bool buildWithoutGeometries)
1019 {
1020 	m_buildWithoutGeometries = buildWithoutGeometries;
1021 }
1022 
setBuildWithoutPrimitives(bool buildWithoutPrimitives)1023 void BottomLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
1024 {
1025 	m_buildWithoutPrimitives = buildWithoutPrimitives;
1026 }
1027 
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)1028 void BottomLevelAccelerationStructureKHR::setDeferredOperation (const bool		deferredOperation,
1029 																const deUint32	workerThreadCount)
1030 {
1031 	m_deferredOperation = deferredOperation;
1032 	m_workerThreadCount = workerThreadCount;
1033 }
1034 
setUseArrayOfPointers(const bool useArrayOfPointers)1035 void BottomLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool	useArrayOfPointers)
1036 {
1037 	m_useArrayOfPointers = useArrayOfPointers;
1038 }
1039 
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)1040 void BottomLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer		indirectBuffer,
1041 																	  const VkDeviceSize	indirectBufferOffset,
1042 																	  const deUint32		indirectBufferStride)
1043 {
1044 	m_indirectBuffer		= indirectBuffer;
1045 	m_indirectBufferOffset	= indirectBufferOffset;
1046 	m_indirectBufferStride	= indirectBufferStride;
1047 }
1048 
getBuildFlags() const1049 VkBuildAccelerationStructureFlagsKHR BottomLevelAccelerationStructureKHR::getBuildFlags () const
1050 {
1051 	return m_buildFlags;
1052 }
1053 
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement)1054 void BottomLevelAccelerationStructureKHR::create (const DeviceInterface&				vk,
1055 												  const VkDevice						device,
1056 												  Allocator&							allocator,
1057 												  VkDeviceSize							structureSize,
1058 												  VkDeviceAddress						deviceAddress,
1059 												  const void*							pNext,
1060 												  const MemoryRequirement&				addMemoryRequirement)
1061 {
1062 	// AS may be built from geometries using vkCmdBuildAccelerationStructuresKHR / vkBuildAccelerationStructuresKHR
1063 	// or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
1064 	DE_ASSERT(!m_geometriesData.empty() !=  !(structureSize == 0)); // logical xor
1065 
1066 	if (structureSize == 0)
1067 	{
1068 		std::vector<VkAccelerationStructureGeometryKHR>			accelerationStructureGeometriesKHR;
1069 		std::vector<VkAccelerationStructureGeometryKHR*>		accelerationStructureGeometriesKHRPointers;
1070 		std::vector<VkAccelerationStructureBuildRangeInfoKHR>	accelerationStructureBuildRangeInfoKHR;
1071 		std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1072 		std::vector<deUint32>									maxPrimitiveCounts;
1073 		prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
1074 
1075 		const VkAccelerationStructureGeometryKHR*				accelerationStructureGeometriesKHRPointer	= accelerationStructureGeometriesKHR.data();
1076 		const VkAccelerationStructureGeometryKHR* const*		accelerationStructureGeometry				= accelerationStructureGeometriesKHRPointers.data();
1077 
1078 		VkAccelerationStructureBuildGeometryInfoKHR	accelerationStructureBuildGeometryInfoKHR	=
1079 		{
1080 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,			//  VkStructureType										sType;
1081 			DE_NULL,																	//  const void*											pNext;
1082 			VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,							//  VkAccelerationStructureTypeKHR						type;
1083 			m_buildFlags,																//  VkBuildAccelerationStructureFlagsKHR				flags;
1084 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,								//  VkBuildAccelerationStructureModeKHR					mode;
1085 			DE_NULL,																	//  VkAccelerationStructureKHR							srcAccelerationStructure;
1086 			DE_NULL,																	//  VkAccelerationStructureKHR							dstAccelerationStructure;
1087 			static_cast<deUint32>(accelerationStructureGeometriesKHR.size()),			//  deUint32											geometryCount;
1088 			m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,	//  const VkAccelerationStructureGeometryKHR*			pGeometries;
1089 			m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,				//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
1090 			makeDeviceOrHostAddressKHR(DE_NULL)											//  VkDeviceOrHostAddressKHR							scratchData;
1091 		};
1092 		VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
1093 		{
1094 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
1095 			DE_NULL,														//  const void*		pNext;
1096 			0,																//  VkDeviceSize	accelerationStructureSize;
1097 			0,																//  VkDeviceSize	updateScratchSize;
1098 			0																//  VkDeviceSize	buildScratchSize;
1099 		};
1100 
1101 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
1102 
1103 		m_structureSize		= sizeInfo.accelerationStructureSize;
1104 		m_updateScratchSize	= sizeInfo.updateScratchSize;
1105 		m_buildScratchSize	= sizeInfo.buildScratchSize;
1106 	}
1107 	else
1108 	{
1109 		m_structureSize		= structureSize;
1110 		m_updateScratchSize	= 0u;
1111 		m_buildScratchSize	= 0u;
1112 	}
1113 
1114 	{
1115 		const VkBufferCreateInfo		bufferCreateInfo		= makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1116 		const MemoryRequirement			memoryRequirement		= addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
1117 
1118 		try
1119 		{
1120 			m_accelerationStructureBuffer	= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
1121 		}
1122 		catch (const tcu::NotSupportedError&)
1123 		{
1124 			// retry without Cached flag
1125 			m_accelerationStructureBuffer	= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
1126 		}
1127 	}
1128 
1129 	{
1130 		const VkAccelerationStructureTypeKHR		structureType						= (m_createGeneric
1131 																						   ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
1132 																						   : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
1133 		const VkAccelerationStructureCreateInfoKHR	accelerationStructureCreateInfoKHR
1134 		{
1135 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,						//  VkStructureType											sType;
1136 			pNext,																			//  const void*												pNext;
1137 			m_createFlags,																	//  VkAccelerationStructureCreateFlagsKHR					createFlags;
1138 			getAccelerationStructureBuffer()->get(),										//  VkBuffer												buffer;
1139 			getAccelerationStructureBufferOffset(),											//  VkDeviceSize											offset;
1140 			m_structureSize,																//  VkDeviceSize											size;
1141 			structureType,																	//  VkAccelerationStructureTypeKHR							type;
1142 			deviceAddress																	//  VkDeviceAddress											deviceAddress;
1143 		};
1144 
1145 		m_accelerationStructureKHR	= createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
1146 	}
1147 
1148 	if (m_buildScratchSize > 0u)
1149 	{
1150 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1151 		{
1152 			const VkBufferCreateInfo		bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1153 			m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1154 		}
1155 		else
1156 		{
1157 			m_hostScratchBuffer->resize(static_cast<size_t>(m_buildScratchSize));
1158 		}
1159 	}
1160 
1161 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR && !m_geometriesData.empty())
1162 	{
1163 		m_vertexBuffer	= de::MovePtr<BufferWithMemory>(createVertexBuffer(vk, device, allocator, m_geometriesData));
1164 		m_indexBuffer	= de::MovePtr<BufferWithMemory>(createIndexBuffer(vk, device, allocator, m_geometriesData));
1165 	}
1166 }
1167 
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer)1168 void BottomLevelAccelerationStructureKHR::build (const DeviceInterface&						vk,
1169 												 const VkDevice								device,
1170 												 const VkCommandBuffer						cmdBuffer)
1171 {
1172 	DE_ASSERT(!m_geometriesData.empty());
1173 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1174 	DE_ASSERT(m_buildScratchSize != 0);
1175 
1176 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1177 	{
1178 		updateVertexBuffer(vk, device, m_geometriesData,  getVertexBuffer(), getVertexBufferOffset());
1179 		if(getIndexBuffer() != DE_NULL)
1180 			updateIndexBuffer(vk, device, m_geometriesData, getIndexBuffer(), getIndexBufferOffset());
1181 	}
1182 
1183 	{
1184 		std::vector<VkAccelerationStructureGeometryKHR>			accelerationStructureGeometriesKHR;
1185 		std::vector<VkAccelerationStructureGeometryKHR*>		accelerationStructureGeometriesKHRPointers;
1186 		std::vector<VkAccelerationStructureBuildRangeInfoKHR>	accelerationStructureBuildRangeInfoKHR;
1187 		std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1188 		std::vector<deUint32>									maxPrimitiveCounts;
1189 
1190 		prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers,
1191 						  accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts, getVertexBufferOffset(), getIndexBufferOffset());
1192 
1193 		const VkAccelerationStructureGeometryKHR*			accelerationStructureGeometriesKHRPointer	= accelerationStructureGeometriesKHR.data();
1194 		const VkAccelerationStructureGeometryKHR* const*	accelerationStructureGeometry				= accelerationStructureGeometriesKHRPointers.data();
1195 		VkDeviceOrHostAddressKHR							scratchData									= (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1196 																										? makeDeviceOrHostAddressKHR(vk, device, getDeviceScratchBuffer()->get(), getDeviceScratchBufferOffset())
1197 																										: makeDeviceOrHostAddressKHR(getHostScratchBuffer()->data());
1198 		const deUint32										geometryCount								= (m_buildWithoutGeometries
1199 																										? 0u
1200 																										: static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1201 
1202 		VkAccelerationStructureBuildGeometryInfoKHR	accelerationStructureBuildGeometryInfoKHR	=
1203 		{
1204 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,			//  VkStructureType										sType;
1205 			DE_NULL,																	//  const void*											pNext;
1206 			VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,							//  VkAccelerationStructureTypeKHR						type;
1207 			m_buildFlags,																//  VkBuildAccelerationStructureFlagsKHR				flags;
1208 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,								//  VkBuildAccelerationStructureModeKHR					mode;
1209 			DE_NULL,																	//  VkAccelerationStructureKHR							srcAccelerationStructure;
1210 			m_accelerationStructureKHR.get(),											//  VkAccelerationStructureKHR							dstAccelerationStructure;
1211 			geometryCount,																//  deUint32											geometryCount;
1212 			m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,	//  const VkAccelerationStructureGeometryKHR*			pGeometries;
1213 			m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,				//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
1214 			scratchData																	//  VkDeviceOrHostAddressKHR							scratchData;
1215 		};
1216 
1217 		VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr	= accelerationStructureBuildRangeInfoKHR.data();
1218 
1219 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1220 		{
1221 			if (m_indirectBuffer == DE_NULL)
1222 				vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1223 			else
1224 			{
1225 				VkDeviceAddress	indirectDeviceAddress	= getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
1226 				deUint32*		pMaxPrimitiveCounts		= maxPrimitiveCounts.data();
1227 				vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
1228 			}
1229 		}
1230 		else if (!m_deferredOperation)
1231 		{
1232 			VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
1233 		}
1234 		else
1235 		{
1236 			const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1237 			const auto deferredOperation	= deferredOperationPtr.get();
1238 
1239 			VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1240 
1241 			DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1242 
1243 			finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1244 		}
1245 	}
1246 
1247 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1248 	{
1249 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1250 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
1251 
1252 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1253 	}
1254 }
1255 
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,BottomLevelAccelerationStructure * accelerationStructure,bool compactCopy)1256 void BottomLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&						vk,
1257 													const VkDevice								device,
1258 													const VkCommandBuffer						cmdBuffer,
1259 													BottomLevelAccelerationStructure*			accelerationStructure,
1260 													bool										compactCopy)
1261 {
1262 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1263 	DE_ASSERT(accelerationStructure != DE_NULL);
1264 
1265 	VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1266 	{
1267 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,															// VkStructureType						sType;
1268 		DE_NULL,																										// const void*							pNext;
1269 		*(accelerationStructure->getPtr()),																				// VkAccelerationStructureKHR			src;
1270 		*(getPtr()),																									// VkAccelerationStructureKHR			dst;
1271 		compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR	// VkCopyAccelerationStructureModeKHR	mode;
1272 	};
1273 
1274 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1275 	{
1276 		vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1277 	}
1278 	else if (!m_deferredOperation)
1279 	{
1280 		VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1281 	}
1282 	else
1283 	{
1284 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1285 		const auto deferredOperation	= deferredOperationPtr.get();
1286 
1287 		VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1288 
1289 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1290 
1291 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1292 	}
1293 
1294 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1295 	{
1296 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1297 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
1298 
1299 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1300 	}
1301 }
1302 
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1303 void BottomLevelAccelerationStructureKHR::serialize (const DeviceInterface&		vk,
1304 													 const VkDevice				device,
1305 													 const VkCommandBuffer		cmdBuffer,
1306 													 SerialStorage*				storage)
1307 {
1308 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1309 	DE_ASSERT(storage != DE_NULL);
1310 
1311 	const VkCopyAccelerationStructureToMemoryInfoKHR	copyAccelerationStructureInfo	=
1312 	{
1313 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,	// VkStructureType						sType;
1314 		DE_NULL,															// const void*							pNext;
1315 		*(getPtr()),														// VkAccelerationStructureKHR			src;
1316 		storage->getAddress(vk, device, m_buildType),						// VkDeviceOrHostAddressKHR				dst;
1317 		VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR	mode;
1318 	};
1319 
1320 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1321 	{
1322 		vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
1323 	}
1324 	else if (!m_deferredOperation)
1325 	{
1326 		VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1327 	}
1328 	else
1329 	{
1330 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1331 		const auto deferredOperation	= deferredOperationPtr.get();
1332 
1333 		const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1334 
1335 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1336 
1337 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1338 	}
1339 }
1340 
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1341 void BottomLevelAccelerationStructureKHR::deserialize (const DeviceInterface&	vk,
1342 													   const VkDevice			device,
1343 													   const VkCommandBuffer	cmdBuffer,
1344 													   SerialStorage*			storage)
1345 {
1346 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1347 	DE_ASSERT(storage != DE_NULL);
1348 
1349 	const VkCopyMemoryToAccelerationStructureInfoKHR	copyAccelerationStructureInfo	=
1350 	{
1351 		VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,	// VkStructureType							sType;
1352 		DE_NULL,															// const void*								pNext;
1353 		storage->getAddressConst(vk, device, m_buildType),					// VkDeviceOrHostAddressConstKHR			src;
1354 		*(getPtr()),														// VkAccelerationStructureKHR				dst;
1355 		VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR		mode;
1356 	};
1357 
1358 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1359 	{
1360 		vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1361 	}
1362 	else if (!m_deferredOperation)
1363 	{
1364 		VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1365 	}
1366 	else
1367 	{
1368 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1369 		const auto deferredOperation	= deferredOperationPtr.get();
1370 
1371 		const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1372 
1373 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1374 
1375 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1376 	}
1377 
1378 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1379 	{
1380 		const VkAccessFlags		accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1381 		const VkMemoryBarrier	memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1382 
1383 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1384 	}
1385 }
1386 
getPtr(void) const1387 const VkAccelerationStructureKHR* BottomLevelAccelerationStructureKHR::getPtr (void) const
1388 {
1389 	return &m_accelerationStructureKHR.get();
1390 }
1391 
prepareGeometries(const DeviceInterface & vk,const VkDevice device,std::vector<VkAccelerationStructureGeometryKHR> & accelerationStructureGeometriesKHR,std::vector<VkAccelerationStructureGeometryKHR * > & accelerationStructureGeometriesKHRPointers,std::vector<VkAccelerationStructureBuildRangeInfoKHR> & accelerationStructureBuildRangeInfoKHR,std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> & accelerationStructureGeometryMicromapsEXT,std::vector<deUint32> & maxPrimitiveCounts,VkDeviceSize vertexBufferOffset,VkDeviceSize indexBufferOffset) const1392 void BottomLevelAccelerationStructureKHR::prepareGeometries (const DeviceInterface&												vk,
1393 															 const VkDevice														device,
1394 															 std::vector<VkAccelerationStructureGeometryKHR>&					accelerationStructureGeometriesKHR,
1395 															 std::vector<VkAccelerationStructureGeometryKHR*>&					accelerationStructureGeometriesKHRPointers,
1396 															 std::vector<VkAccelerationStructureBuildRangeInfoKHR>&				accelerationStructureBuildRangeInfoKHR,
1397 															 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>&	accelerationStructureGeometryMicromapsEXT,
1398 															 std::vector<deUint32>&												maxPrimitiveCounts,
1399 															 VkDeviceSize														vertexBufferOffset,
1400 															 VkDeviceSize														indexBufferOffset) const
1401 {
1402 	accelerationStructureGeometriesKHR.resize(m_geometriesData.size());
1403 	accelerationStructureGeometriesKHRPointers.resize(m_geometriesData.size());
1404 	accelerationStructureBuildRangeInfoKHR.resize(m_geometriesData.size());
1405 	accelerationStructureGeometryMicromapsEXT.resize(m_geometriesData.size());
1406 	maxPrimitiveCounts.resize(m_geometriesData.size());
1407 
1408 	for (size_t geometryNdx = 0; geometryNdx < m_geometriesData.size(); ++geometryNdx)
1409 	{
1410 		const de::SharedPtr<RaytracedGeometryBase>&				geometryData = m_geometriesData[geometryNdx];
1411 		VkDeviceOrHostAddressConstKHR							vertexData, indexData;
1412 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1413 		{
1414 			if (getVertexBuffer() != DE_NULL)
1415 			{
1416 				vertexData			= makeDeviceOrHostAddressConstKHR(vk, device, getVertexBuffer()->get(), vertexBufferOffset);
1417 				if (m_indirectBuffer == DE_NULL )
1418 				{
1419 					vertexBufferOffset	+= deAlignSize(geometryData->getVertexByteSize(), 8);
1420 				}
1421 			}
1422 			else
1423 				vertexData			= makeDeviceOrHostAddressConstKHR(DE_NULL);
1424 
1425 			if (getIndexBuffer() != DE_NULL &&  geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1426 			{
1427 				indexData			= makeDeviceOrHostAddressConstKHR(vk, device, getIndexBuffer()->get(), indexBufferOffset);
1428 				indexBufferOffset	+= deAlignSize(geometryData->getIndexByteSize(), 8);
1429 			}
1430 			else
1431 				indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1432 		}
1433 		else
1434 		{
1435 			vertexData = makeDeviceOrHostAddressConstKHR(geometryData->getVertexPointer());
1436 			if (geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1437 				indexData = makeDeviceOrHostAddressConstKHR(geometryData->getIndexPointer());
1438 			else
1439 				indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1440 		}
1441 
1442 		VkAccelerationStructureGeometryTrianglesDataKHR	accelerationStructureGeometryTrianglesDataKHR =
1443 		{
1444 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,	//  VkStructureType					sType;
1445 			DE_NULL,																//  const void*						pNext;
1446 			geometryData->getVertexFormat(),										//  VkFormat						vertexFormat;
1447 			vertexData,																//  VkDeviceOrHostAddressConstKHR	vertexData;
1448 			geometryData->getVertexStride(),										//  VkDeviceSize					vertexStride;
1449 			static_cast<deUint32>(geometryData->getVertexCount()),					//  uint32_t						maxVertex;
1450 			geometryData->getIndexType(),											//  VkIndexType						indexType;
1451 			indexData,																//  VkDeviceOrHostAddressConstKHR	indexData;
1452 			makeDeviceOrHostAddressConstKHR(DE_NULL),								//  VkDeviceOrHostAddressConstKHR	transformData;
1453 		};
1454 
1455 		if (geometryData->getHasOpacityMicromap())
1456 			accelerationStructureGeometryTrianglesDataKHR.pNext = &geometryData->getOpacityMicromap();
1457 
1458 		const VkAccelerationStructureGeometryAabbsDataKHR		accelerationStructureGeometryAabbsDataKHR =
1459 		{
1460 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR,	//  VkStructureType					sType;
1461 			DE_NULL,															//  const void*						pNext;
1462 			vertexData,															//  VkDeviceOrHostAddressConstKHR	data;
1463 			geometryData->getAABBStride()										//  VkDeviceSize					stride;
1464 		};
1465 		const VkAccelerationStructureGeometryDataKHR			geometry = (geometryData->isTrianglesType())
1466 																		 ? makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryTrianglesDataKHR)
1467 																		 : makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryAabbsDataKHR);
1468 		const VkAccelerationStructureGeometryKHR				accelerationStructureGeometryKHR =
1469 		{
1470 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,	//  VkStructureType							sType;
1471 			DE_NULL,												//  const void*								pNext;
1472 			geometryData->getGeometryType(),						//  VkGeometryTypeKHR						geometryType;
1473 			geometry,												//  VkAccelerationStructureGeometryDataKHR	geometry;
1474 			geometryData->getGeometryFlags()						//  VkGeometryFlagsKHR						flags;
1475 		};
1476 
1477 		const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : geometryData->getPrimitiveCount());
1478 
1479 		const VkAccelerationStructureBuildRangeInfoKHR			accelerationStructureBuildRangeInfosKHR =
1480 		{
1481 			primitiveCount,	//  deUint32	primitiveCount;
1482 			0,				//  deUint32	primitiveOffset;
1483 			0,				//  deUint32	firstVertex;
1484 			0				//  deUint32	firstTransform;
1485 		};
1486 
1487 		accelerationStructureGeometriesKHR[geometryNdx]			= accelerationStructureGeometryKHR;
1488 		accelerationStructureGeometriesKHRPointers[geometryNdx]	= &accelerationStructureGeometriesKHR[geometryNdx];
1489 		accelerationStructureBuildRangeInfoKHR[geometryNdx]		= accelerationStructureBuildRangeInfosKHR;
1490 		maxPrimitiveCounts[geometryNdx]							= geometryData->getPrimitiveCount();
1491 	}
1492 }
1493 
getRequiredAllocationCount(void)1494 deUint32 BottomLevelAccelerationStructure::getRequiredAllocationCount (void)
1495 {
1496 	return BottomLevelAccelerationStructureKHR::getRequiredAllocationCount();
1497 }
1498 
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)1499 void BottomLevelAccelerationStructure::createAndBuild (const DeviceInterface&	vk,
1500 													   const VkDevice			device,
1501 													   const VkCommandBuffer	cmdBuffer,
1502 													   Allocator&				allocator,
1503 													   VkDeviceAddress			deviceAddress)
1504 {
1505 	create(vk, device, allocator, 0u, deviceAddress);
1506 	build(vk, device, cmdBuffer);
1507 }
1508 
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,BottomLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)1509 void BottomLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&				vk,
1510 														  const VkDevice						device,
1511 														  const VkCommandBuffer					cmdBuffer,
1512 														  Allocator&							allocator,
1513 														  BottomLevelAccelerationStructure*		accelerationStructure,
1514 														  VkDeviceSize							compactCopySize,
1515 														  VkDeviceAddress						deviceAddress)
1516 {
1517 	DE_ASSERT(accelerationStructure != NULL);
1518 	VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
1519 	DE_ASSERT(copiedSize != 0u);
1520 
1521 	create(vk, device, allocator, copiedSize, deviceAddress);
1522 	copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
1523 }
1524 
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)1525 void BottomLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
1526 																 const VkDevice								device,
1527 																 const VkCommandBuffer						cmdBuffer,
1528 																 Allocator&									allocator,
1529 																 SerialStorage*								storage,
1530 																 VkDeviceAddress							deviceAddress )
1531 {
1532 	DE_ASSERT(storage != NULL);
1533 	DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
1534 	create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
1535 	deserialize(vk, device, cmdBuffer, storage);
1536 }
1537 
makeBottomLevelAccelerationStructure()1538 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure ()
1539 {
1540 	return de::MovePtr<BottomLevelAccelerationStructure>(new BottomLevelAccelerationStructureKHR);
1541 }
1542 
1543 // Forward declaration
1544 struct BottomLevelAccelerationStructurePoolImpl;
1545 
1546 class BottomLevelAccelerationStructurePoolMember : public BottomLevelAccelerationStructureKHR
1547 {
1548 public:
1549 	friend class BottomLevelAccelerationStructurePool;
1550 
1551 								BottomLevelAccelerationStructurePoolMember	(BottomLevelAccelerationStructurePoolImpl& pool);
1552 								BottomLevelAccelerationStructurePoolMember	(const BottomLevelAccelerationStructurePoolMember&) = delete;
1553 								BottomLevelAccelerationStructurePoolMember	(BottomLevelAccelerationStructurePoolMember&&) = delete;
1554 	virtual						~BottomLevelAccelerationStructurePoolMember	() = default;
1555 
create(const DeviceInterface &,const VkDevice,Allocator &,VkDeviceSize,VkDeviceAddress,const void *,const MemoryRequirement &)1556 	virtual void				create										(const DeviceInterface&,
1557 																			 const VkDevice,
1558 																			 Allocator&,
1559 																			 VkDeviceSize,
1560 																			 VkDeviceAddress,
1561 																			 const void*,
1562 																			 const MemoryRequirement&) override
1563 								{
1564 									DE_ASSERT(0); // Silent this method
1565 								}
1566 	virtual auto				computeBuildSize							(const DeviceInterface&	vk,
1567 																			 const VkDevice			device,
1568 																			 const VkDeviceSize		strSize) const
1569 																			 //              accStrSize,updateScratch, buildScratch, vertexSize,   indexSize
1570 																			 -> std::tuple<VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize>;
1571 protected:
1572 	struct Info;
1573 	virtual void				preCreateSetSizesAndOffsets					(const Info&			info,
1574 																			 const VkDeviceSize		accStrSize,
1575 																			 const VkDeviceSize		updateScratchSize,
1576 																			 const VkDeviceSize		buildScratchSize);
1577 	virtual void				createAccellerationStructure				(const DeviceInterface&	vk,
1578 																			 const VkDevice			device,
1579 																			 VkDeviceAddress		deviceAddress);
1580 
1581 	virtual BufferWithMemory*	getAccelerationStructureBuffer				() const override;
1582 	virtual BufferWithMemory*	getDeviceScratchBuffer						() const override;
1583 	virtual std::vector<deUint8>*	getHostScratchBuffer					() const override;
1584 	virtual BufferWithMemory*	getVertexBuffer								() const override;
1585 	virtual BufferWithMemory*	getIndexBuffer								() const override;
1586 
getAccelerationStructureBufferOffset() const1587 	virtual VkDeviceSize		getAccelerationStructureBufferOffset		() const override { return m_info.accStrOffset; }
getDeviceScratchBufferOffset() const1588 	virtual VkDeviceSize		getDeviceScratchBufferOffset				() const override { return m_info.buildScratchBuffOffset; }
getVertexBufferOffset() const1589 	virtual VkDeviceSize		getVertexBufferOffset						() const override { return m_info.vertBuffOffset; }
getIndexBufferOffset() const1590 	virtual VkDeviceSize		getIndexBufferOffset						() const override { return m_info.indexBuffOffset; }
1591 
1592 	BottomLevelAccelerationStructurePoolImpl&	m_pool;
1593 
1594 	struct Info
1595 	{
1596 		deUint32				accStrIndex;
1597 		VkDeviceSize			accStrOffset;
1598 		deUint32				vertBuffIndex;
1599 		VkDeviceSize			vertBuffOffset;
1600 		deUint32				indexBuffIndex;
1601 		VkDeviceSize			indexBuffOffset;
1602 		deUint32				buildScratchBuffIndex;
1603 		VkDeviceSize			buildScratchBuffOffset;
1604 	}											m_info;
1605 };
1606 
negz(const X &)1607 template<class X> inline X negz (const X&)
1608 {
1609 	return (~static_cast<X>(0));
1610 }
isnegz(const X & x)1611 template<class X> inline bool isnegz (const X& x)
1612 {
1613 	return x == negz(x);
1614 }
make_unsigned(const Y & y)1615 template<class Y> inline auto make_unsigned(const Y& y) -> typename std::make_unsigned<Y>::type
1616 {
1617 	return static_cast<typename std::make_unsigned<Y>::type>(y);
1618 }
1619 
BottomLevelAccelerationStructurePoolMember(BottomLevelAccelerationStructurePoolImpl & pool)1620 BottomLevelAccelerationStructurePoolMember::BottomLevelAccelerationStructurePoolMember	(BottomLevelAccelerationStructurePoolImpl& pool)
1621 	: m_pool	(pool)
1622 	, m_info	{}
1623 {
1624 }
1625 
1626 struct BottomLevelAccelerationStructurePoolImpl
1627 {
1628 	BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePoolImpl&&) = delete;
1629 	BottomLevelAccelerationStructurePoolImpl (const BottomLevelAccelerationStructurePoolImpl&) = delete;
1630 	BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool);
1631 
1632 	BottomLevelAccelerationStructurePool&			m_pool;
1633 	std::vector<de::SharedPtr<BufferWithMemory>>	m_accellerationStructureBuffers;
1634 	de::SharedPtr<BufferWithMemory>					m_deviceScratchBuffer;
1635 	de::UniquePtr<std::vector<deUint8>>				m_hostScratchBuffer;
1636 	std::vector<de::SharedPtr<BufferWithMemory>>	m_vertexBuffers;
1637 	std::vector<de::SharedPtr<BufferWithMemory>>	m_indexBuffers;
1638 };
BottomLevelAccelerationStructurePoolImpl(BottomLevelAccelerationStructurePool & pool)1639 BottomLevelAccelerationStructurePoolImpl::BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool)
1640 	: m_pool							(pool)
1641 	, m_accellerationStructureBuffers	()
1642 	, m_deviceScratchBuffer				()
1643 	, m_hostScratchBuffer				(new std::vector<deUint8>)
1644 	, m_vertexBuffers					()
1645 	, m_indexBuffers					()
1646 {
1647 }
getAccelerationStructureBuffer() const1648 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getAccelerationStructureBuffer () const
1649 {
1650 	BufferWithMemory* result = nullptr;
1651 	if (m_pool.m_accellerationStructureBuffers.size())
1652 	{
1653 		DE_ASSERT(!isnegz(m_info.accStrIndex));
1654 		result = m_pool.m_accellerationStructureBuffers[m_info.accStrIndex].get();
1655 	}
1656 	return result;
1657 }
getDeviceScratchBuffer() const1658 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getDeviceScratchBuffer () const
1659 {
1660 	DE_ASSERT(m_info.buildScratchBuffIndex == 0);
1661 	return m_pool.m_deviceScratchBuffer.get();
1662 }
getHostScratchBuffer() const1663 std::vector<deUint8>* BottomLevelAccelerationStructurePoolMember::getHostScratchBuffer () const
1664 {
1665 	return this->m_buildScratchSize ? m_pool.m_hostScratchBuffer.get() : nullptr;
1666 }
1667 
getVertexBuffer() const1668 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getVertexBuffer () const
1669 {
1670 	BufferWithMemory* result = nullptr;
1671 	if (m_pool.m_vertexBuffers.size())
1672 	{
1673 		DE_ASSERT(!isnegz(m_info.vertBuffIndex));
1674 		result = m_pool.m_vertexBuffers[m_info.vertBuffIndex].get();
1675 	}
1676 	return result;
1677 }
getIndexBuffer() const1678 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getIndexBuffer () const
1679 {
1680 	BufferWithMemory* result = nullptr;
1681 	if (m_pool.m_indexBuffers.size())
1682 	{
1683 		DE_ASSERT(!isnegz(m_info.indexBuffIndex));
1684 		result = m_pool.m_indexBuffers[m_info.indexBuffIndex].get();
1685 	}
1686 	return result;
1687 }
1688 
1689 struct BottomLevelAccelerationStructurePool::Impl : BottomLevelAccelerationStructurePoolImpl
1690 {
1691 	friend class BottomLevelAccelerationStructurePool;
1692 	friend class BottomLevelAccelerationStructurePoolMember;
1693 
Implvk::BottomLevelAccelerationStructurePool::Impl1694 	Impl (BottomLevelAccelerationStructurePool& pool)
1695 		: BottomLevelAccelerationStructurePoolImpl(pool) { }
1696 };
1697 
BottomLevelAccelerationStructurePool()1698 BottomLevelAccelerationStructurePool::BottomLevelAccelerationStructurePool ()
1699 	: m_batchStructCount	(4)
1700 	, m_batchGeomCount		(0)
1701 	, m_infos				()
1702 	, m_structs				()
1703 	, m_createOnce			(false)
1704 	, m_tryCachedMemory		(true)
1705 	, m_structsBuffSize		(0)
1706 	, m_updatesScratchSize	(0)
1707 	, m_buildsScratchSize	(0)
1708 	, m_verticesSize		(0)
1709 	, m_indicesSize			(0)
1710 	, m_impl				(new Impl(*this))
1711 {
1712 }
1713 
~BottomLevelAccelerationStructurePool()1714 BottomLevelAccelerationStructurePool::~BottomLevelAccelerationStructurePool()
1715 {
1716 	delete m_impl;
1717 }
1718 
batchStructCount(const deUint32 & value)1719 void BottomLevelAccelerationStructurePool::batchStructCount (const deUint32& value)
1720 {
1721 	DE_ASSERT(value >= 1); m_batchStructCount = value;
1722 }
1723 
add(VkDeviceSize structureSize,VkDeviceAddress deviceAddress)1724 auto BottomLevelAccelerationStructurePool::add (VkDeviceSize		structureSize,
1725 												VkDeviceAddress		deviceAddress) -> BottomLevelAccelerationStructurePool::BlasPtr
1726 {
1727 	// Prevent a programmer from calling this method after batchCreate(...) method has been called.
1728 	if (m_createOnce) DE_ASSERT(0);
1729 
1730 	auto blas = new BottomLevelAccelerationStructurePoolMember(*m_impl);
1731 	m_infos.push_back({structureSize, deviceAddress});
1732 	m_structs.emplace_back(blas);
1733 	return m_structs.back();
1734 }
1735 
adjustBatchCount(const DeviceInterface & vkd,const VkDevice device,const std::vector<BottomLevelAccelerationStructurePool::BlasPtr> & structs,const std::vector<BottomLevelAccelerationStructurePool::BlasInfo> & infos,const VkDeviceSize maxBufferSize,deUint32 (& result)[4])1736 void adjustBatchCount (const DeviceInterface&		vkd,
1737 					   const VkDevice				device,
1738 					   const std::vector<BottomLevelAccelerationStructurePool::BlasPtr>& structs,
1739 					   const std::vector<BottomLevelAccelerationStructurePool::BlasInfo>& infos,
1740 					   const VkDeviceSize			maxBufferSize,
1741 					   deUint32						(&result)[4])
1742 {
1743 	tcu::Vector<VkDeviceSize, 4>	sizes(0);
1744 	tcu::Vector<VkDeviceSize, 4>	sums(0);
1745 	tcu::Vector<deUint32, 4>		tmps(0);
1746 	tcu::Vector<deUint32, 4>		batches(0);
1747 
1748 	VkDeviceSize	updateScratchSize = 0;	static_cast<void>(updateScratchSize);	// not used yet, disabled for future implementation
1749 
1750 	auto updateIf = [&](deUint32 c)
1751 	{
1752 		if (sums[c] + sizes[c] <= maxBufferSize)
1753 		{
1754 			sums[c] += sizes[c];
1755 			tmps[c] += 1;
1756 
1757 			batches[c] = std::max(tmps[c], batches[c]);
1758 		}
1759 		else
1760 		{
1761 			sums[c] = 0;
1762 			tmps[c] = 0;
1763 		}
1764 	};
1765 
1766 	const deUint32	maxIter	= static_cast<deUint32>(structs.size());
1767 	for (deUint32 i = 0; i < maxIter; ++i)
1768 	{
1769 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(structs[i].get());
1770 		std::tie(sizes[0], updateScratchSize, sizes[1], sizes[2], sizes[3]) = str.computeBuildSize(vkd, device, infos[i].structureSize);
1771 
1772 		updateIf(0);
1773 		updateIf(1);
1774 		updateIf(2);
1775 		updateIf(3);
1776 	}
1777 
1778 	result[0] = std::max(batches[0], 1u);
1779 	result[1] = std::max(batches[1], 1u);
1780 	result[2] = std::max(batches[2], 1u);
1781 	result[3] = std::max(batches[3], 1u);
1782 }
1783 
getAllocationCount() const1784 size_t BottomLevelAccelerationStructurePool::getAllocationCount () const
1785 {
1786 	return m_impl->m_accellerationStructureBuffers.size()
1787 			+ m_impl->m_vertexBuffers.size()
1788 			+ m_impl->m_indexBuffers.size()
1789 			+ 1 /* for scratch buffer */;
1790 }
1791 
getAllocationCount(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize maxBufferSize) const1792 size_t BottomLevelAccelerationStructurePool::getAllocationCount (const DeviceInterface&		vk,
1793 																 const VkDevice				device,
1794 																 const VkDeviceSize			maxBufferSize) const
1795 {
1796 	DE_ASSERT(m_structs.size() != 0);
1797 
1798 	std::map<deUint32, VkDeviceSize>	accStrSizes;
1799 	std::map<deUint32, VkDeviceSize>	vertBuffSizes;
1800 	std::map<deUint32, VkDeviceSize>	indexBuffSizes;
1801 	std::map<deUint32, VkDeviceSize>	scratchBuffSizes;
1802 
1803 	const deUint32	allStructsCount		= structCount();
1804 
1805 	deUint32		batchStructCount	= m_batchStructCount;
1806 	deUint32		batchScratchCount	= m_batchStructCount;
1807 	deUint32		batchVertexCount	= m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1808 	deUint32		batchIndexCount		= batchVertexCount;
1809 
1810 	if (!isnegz(maxBufferSize))
1811 	{
1812 		deUint32	batches[4];
1813 		adjustBatchCount(vk, device, m_structs, m_infos, maxBufferSize, batches);
1814 		batchStructCount	= batches[0];
1815 		batchScratchCount	= batches[1];
1816 		batchVertexCount	= batches[2];
1817 		batchIndexCount		= batches[3];
1818 	}
1819 
1820 	deUint32		iStr				= 0;
1821 	deUint32		iScratch			= 0;
1822 	deUint32		iVertex				= 0;
1823 	deUint32		iIndex				= 0;
1824 
1825 	VkDeviceSize	strSize				= 0;
1826 	VkDeviceSize	updateScratchSize	= 0;
1827 	VkDeviceSize	buildScratchSize	= 0;
1828 	VkDeviceSize	vertexSize			= 0;
1829 	VkDeviceSize	indexSize			= 0;
1830 
1831 	for (; iStr < allStructsCount; ++iStr)
1832 	{
1833 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1834 		std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[iStr].structureSize);
1835 
1836 		{
1837 			const VkDeviceSize	alignedStrSize	= deAlign64(strSize, 256);
1838 			const deUint32		accStrIndex		= (iStr / batchStructCount);
1839 			accStrSizes[accStrIndex]	+= alignedStrSize;
1840 		}
1841 
1842 		if (buildScratchSize != 0)
1843 		{
1844 			const VkDeviceSize	alignedBuilsScratchSize	= deAlign64(buildScratchSize, 256);
1845 			const deUint32		scratchBuffIndex		= (iScratch/ batchScratchCount);
1846 			scratchBuffSizes[scratchBuffIndex]	+= alignedBuilsScratchSize;
1847 			iScratch							+= 1;
1848 		}
1849 
1850 		if (vertexSize != 0)
1851 		{
1852 			const VkDeviceSize	alignedVertBuffSize	= deAlign64(vertexSize, 8);
1853 			const deUint32		vertBuffIndex		= (iVertex / batchVertexCount);
1854 			vertBuffSizes[vertBuffIndex]	+= alignedVertBuffSize;
1855 			iVertex							+= 1;
1856 		}
1857 
1858 		if (indexSize != 0)
1859 		{
1860 			const VkDeviceSize	alignedIndexBuffSize	= deAlign64(indexSize, 8);
1861 			const deUint32		indexBuffIndex			= (iIndex / batchIndexCount);
1862 			indexBuffSizes[indexBuffIndex]	+= alignedIndexBuffSize;
1863 			iIndex							+= 1;
1864 		}
1865 	}
1866 
1867 	return accStrSizes.size()
1868 			+ vertBuffSizes.size()
1869 			+ indexBuffSizes.size()
1870 			+ scratchBuffSizes.size();
1871 }
1872 
getAllocationSizes(const DeviceInterface & vk,const VkDevice device) const1873 tcu::Vector<VkDeviceSize, 4> BottomLevelAccelerationStructurePool::getAllocationSizes (const DeviceInterface&		vk,
1874 																					   const VkDevice				device) const
1875 {
1876 	if (m_structsBuffSize)
1877 	{
1878 		return tcu::Vector<VkDeviceSize, 4>(m_structsBuffSize, m_buildsScratchSize, m_verticesSize, m_indicesSize);
1879 	}
1880 
1881 	VkDeviceSize strSize				= 0;
1882 	VkDeviceSize updateScratchSize		= 0;	static_cast<void>(updateScratchSize);		// not used yet, disabled for future implementation
1883 	VkDeviceSize buildScratchSize		= 0;
1884 	VkDeviceSize vertexSize				= 0;
1885 	VkDeviceSize indexSize				= 0;
1886 	VkDeviceSize sumStrSize				= 0;
1887 	VkDeviceSize sumUpdateScratchSize	= 0;	static_cast<void>(sumUpdateScratchSize);	// not used yet, disabled for future implementation
1888 	VkDeviceSize sumBuildScratchSize	= 0;
1889 	VkDeviceSize sumVertexSize			= 0;
1890 	VkDeviceSize sumIndexSize			= 0;
1891 	for (size_t i = 0; i < structCount(); ++i)
1892 	{
1893 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[i].get());
1894 		std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[i].structureSize);
1895 		sumStrSize				+= deAlign64(strSize, 256);
1896 		//sumUpdateScratchSize	+= deAlign64(updateScratchSize, 256);	not used yet, disabled for future implementation
1897 		sumBuildScratchSize		+= deAlign64(buildScratchSize, 256);
1898 		sumVertexSize			+= deAlign64(vertexSize, 8);
1899 		sumIndexSize			+= deAlign64(indexSize, 8);
1900 	}
1901 	return tcu::Vector<VkDeviceSize, 4>(sumStrSize, sumBuildScratchSize, sumVertexSize, sumIndexSize);
1902 }
1903 
batchCreate(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator)1904 void BottomLevelAccelerationStructurePool::batchCreate (const DeviceInterface&		vkd,
1905 														const VkDevice				device,
1906 														Allocator&					allocator)
1907 {
1908 	batchCreateAdjust(vkd, device, allocator, negz<VkDeviceSize>(0));
1909 }
1910 
batchCreateAdjust(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator,const VkDeviceSize maxBufferSize)1911 void BottomLevelAccelerationStructurePool::batchCreateAdjust (const DeviceInterface&	vkd,
1912 															  const VkDevice			device,
1913 															  Allocator&				allocator,
1914 															  const VkDeviceSize		maxBufferSize)
1915 {
1916 	// Prevent a programmer from calling this method more than once.
1917 	if (m_createOnce) DE_ASSERT(0);
1918 
1919 	m_createOnce = true;
1920 	DE_ASSERT(m_structs.size() != 0);
1921 
1922 	auto createAccellerationStructureBuffer = [&](VkDeviceSize bufferSize) -> typename std::add_pointer<BufferWithMemory>::type
1923 	{
1924 		BufferWithMemory* res = nullptr;
1925 		const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1926 
1927 		if (m_tryCachedMemory) try
1928 		{
1929 			res = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1930 		}
1931 		catch (const tcu::NotSupportedError&)
1932 		{
1933 			res = nullptr;
1934 		}
1935 
1936 		return (nullptr != res)
1937 				? res
1938 				: (new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1939 	};
1940 
1941 	auto createDeviceScratchBuffer = [&](VkDeviceSize bufferSize) -> de::SharedPtr<BufferWithMemory>
1942 	{
1943 		const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1944 		BufferWithMemory* p = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1945 		return de::SharedPtr<BufferWithMemory>(p);
1946 	};
1947 
1948 	std::map<deUint32, VkDeviceSize>	accStrSizes;
1949 	std::map<deUint32, VkDeviceSize>	vertBuffSizes;
1950 	std::map<deUint32, VkDeviceSize>	indexBuffSizes;
1951 
1952 	const deUint32	allStructsCount		= structCount();
1953 	deUint32		iterKey				= 0;
1954 
1955 	deUint32		batchStructCount	= m_batchStructCount;
1956 	deUint32		batchVertexCount	= m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1957 	deUint32		batchIndexCount		= batchVertexCount;
1958 
1959 	if (!isnegz(maxBufferSize))
1960 	{
1961 		deUint32	batches[4];
1962 		adjustBatchCount(vkd, device, m_structs, m_infos, maxBufferSize, batches);
1963 		batchStructCount	= batches[0];
1964 		// batches[1]: batchScratchCount
1965 		batchVertexCount	= batches[2];
1966 		batchIndexCount		= batches[3];
1967 	}
1968 
1969 	deUint32		iStr				= 0;
1970 	deUint32		iVertex				= 0;
1971 	deUint32		iIndex				= 0;
1972 
1973 	VkDeviceSize	strSize				= 0;
1974 	VkDeviceSize	updateScratchSize	= 0;
1975 	VkDeviceSize	buildScratchSize	= 0;
1976 	VkDeviceSize	maxBuildScratchSize	= 0;
1977 	VkDeviceSize	vertexSize			= 0;
1978 	VkDeviceSize	indexSize			= 0;
1979 
1980 	VkDeviceSize	strOffset			= 0;
1981 	VkDeviceSize	vertexOffset		= 0;
1982 	VkDeviceSize	indexOffset			= 0;
1983 
1984 	deUint32		hostStructCount		= 0;
1985 	deUint32		deviceStructCount	= 0;
1986 
1987 	for (; iStr < allStructsCount; ++iStr)
1988 	{
1989 		BottomLevelAccelerationStructurePoolMember::Info info{};
1990 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1991 		std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vkd, device, m_infos[iStr].structureSize);
1992 
1993 		++(str.getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR ? hostStructCount : deviceStructCount);
1994 
1995 		{
1996 			const VkDeviceSize	alignedStrSize	= deAlign64(strSize, 256);
1997 			const deUint32		accStrIndex		= (iStr / batchStructCount);
1998 			if (iStr != 0 && (iStr % batchStructCount) == 0)
1999 			{
2000 				strOffset				= 0;
2001 			}
2002 
2003 			info.accStrIndex			= accStrIndex;
2004 			info.accStrOffset			= strOffset;
2005 			accStrSizes[accStrIndex]	+= alignedStrSize;
2006 			strOffset					+= alignedStrSize;
2007 			m_structsBuffSize			+= alignedStrSize;
2008 		}
2009 
2010 		if (buildScratchSize != 0)
2011 		{
2012 			maxBuildScratchSize = std::max(maxBuildScratchSize, make_unsigned(deAlign64(buildScratchSize, 256u)));
2013 
2014 			info.buildScratchBuffIndex		= 0;
2015 			info.buildScratchBuffOffset		= 0;
2016 		}
2017 
2018 		if (vertexSize != 0)
2019 		{
2020 			const VkDeviceSize	alignedVertBuffSize	= deAlign64(vertexSize, 8);
2021 			const deUint32		vertBuffIndex		= (iVertex / batchVertexCount);
2022 			if (iVertex != 0 && (iVertex % batchVertexCount) == 0)
2023 			{
2024 				vertexOffset				= 0;
2025 			}
2026 
2027 			info.vertBuffIndex				= vertBuffIndex;
2028 			info.vertBuffOffset				= vertexOffset;
2029 			vertBuffSizes[vertBuffIndex]	+= alignedVertBuffSize;
2030 			vertexOffset					+= alignedVertBuffSize;
2031 			m_verticesSize					+= alignedVertBuffSize;
2032 			iVertex							+= 1;
2033 		}
2034 
2035 		if (indexSize != 0)
2036 		{
2037 			const VkDeviceSize	alignedIndexBuffSize	= deAlign64(indexSize, 8);
2038 			const deUint32		indexBuffIndex			= (iIndex / batchIndexCount);
2039 			if (iIndex != 0 && (iIndex % batchIndexCount) == 0)
2040 			{
2041 				indexOffset					= 0;
2042 			}
2043 
2044 			info.indexBuffIndex				= indexBuffIndex;
2045 			info.indexBuffOffset			= indexOffset;
2046 			indexBuffSizes[indexBuffIndex]	+= alignedIndexBuffSize;
2047 			indexOffset						+= alignedIndexBuffSize;
2048 			m_indicesSize					+= alignedIndexBuffSize;
2049 			iIndex							+= 1;
2050 		}
2051 
2052 		str.preCreateSetSizesAndOffsets(info, strSize, updateScratchSize, buildScratchSize);
2053 	}
2054 
2055 	for (iterKey = 0; iterKey < static_cast<deUint32>(accStrSizes.size()); ++iterKey)
2056 	{
2057 		m_impl->m_accellerationStructureBuffers.emplace_back(createAccellerationStructureBuffer(accStrSizes.at(iterKey)));
2058 	}
2059 	for (iterKey = 0; iterKey < static_cast<deUint32>(vertBuffSizes.size()); ++iterKey)
2060 	{
2061 		m_impl->m_vertexBuffers.emplace_back(createVertexBuffer(vkd, device, allocator, vertBuffSizes.at(iterKey)));
2062 	}
2063 	for (iterKey = 0; iterKey < static_cast<deUint32>(indexBuffSizes.size()); ++iterKey)
2064 	{
2065 		m_impl->m_indexBuffers.emplace_back(createIndexBuffer(vkd, device, allocator, indexBuffSizes.at(iterKey)));
2066 	}
2067 
2068 	if (maxBuildScratchSize)
2069 	{
2070 		if (hostStructCount)	m_impl->m_hostScratchBuffer->resize(static_cast<size_t>(maxBuildScratchSize));
2071 		if (deviceStructCount)	m_impl->m_deviceScratchBuffer = createDeviceScratchBuffer(maxBuildScratchSize);
2072 
2073 		m_buildsScratchSize = maxBuildScratchSize;
2074 	}
2075 
2076 	for (iterKey = 0; iterKey < allStructsCount; ++iterKey)
2077 	{
2078 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iterKey].get());
2079 		str.createAccellerationStructure(vkd, device, m_infos[iterKey].deviceAddress);
2080 	}
2081 }
2082 
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandBuffer cmdBuffer)2083 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface&	vk,
2084 													   const VkDevice			device,
2085 													   VkCommandBuffer			cmdBuffer)
2086 {
2087 	for (const auto& str : m_structs)
2088 	{
2089 		str->build(vk, device, cmdBuffer);
2090 	}
2091 }
2092 
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandPool cmdPool,VkQueue queue,qpWatchDog * watchDog)2093 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface&	vk,
2094 													   const VkDevice			device,
2095 													   VkCommandPool			cmdPool,
2096 													   VkQueue					queue,
2097 													   qpWatchDog*				watchDog)
2098 {
2099 	const deUint32			limit	= 10000u;
2100 	const deUint32			count	= structCount();
2101 	std::vector<BlasPtr>	buildingOnDevice;
2102 
2103 	auto buildOnDevice = [&]() -> void
2104 	{
2105 		Move<VkCommandBuffer>	cmd = allocateCommandBuffer(vk, device, cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2106 
2107 		beginCommandBuffer(vk, *cmd, 0u);
2108 			for (const auto& str : buildingOnDevice)
2109 				str->build(vk, device, *cmd);
2110 		endCommandBuffer(vk, *cmd);
2111 
2112 		submitCommandsAndWait(vk, device, queue, *cmd);
2113 		vk.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2114 	};
2115 
2116 	buildingOnDevice.reserve(limit);
2117 	for (deUint32 i = 0; i < count; ++i)
2118 	{
2119 		auto str = m_structs[i];
2120 
2121 		if (str->getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
2122 			str->build(vk, device, DE_NULL);
2123 		else
2124 			buildingOnDevice.emplace_back(str);
2125 
2126 		if ( buildingOnDevice.size() == limit || (count - 1) == i)
2127 		{
2128 			buildOnDevice();
2129 			buildingOnDevice.clear();
2130 		}
2131 
2132 		if ((i % WATCHDOG_INTERVAL) == 0 && watchDog)
2133 			qpWatchDog_touch(watchDog);
2134 	}
2135 }
2136 
computeBuildSize(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize strSize) const2137 auto BottomLevelAccelerationStructurePoolMember::computeBuildSize (const DeviceInterface&	vk,
2138 																   const VkDevice			device,
2139 																   const VkDeviceSize		strSize) const
2140 																   //              accStrSize,updateScratch,buildScratch, vertexSize, indexSize
2141 																   -> std::tuple<VkDeviceSize, VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize>
2142 {
2143 	DE_ASSERT(!m_geometriesData.empty() !=  !(strSize == 0)); // logical xor
2144 
2145 	std::tuple<VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize> result(deAlign64(strSize, 256), 0, 0, 0, 0);
2146 
2147 	if (!m_geometriesData.empty())
2148 	{
2149 		std::vector<VkAccelerationStructureGeometryKHR>			accelerationStructureGeometriesKHR;
2150 		std::vector<VkAccelerationStructureGeometryKHR*>		accelerationStructureGeometriesKHRPointers;
2151 		std::vector<VkAccelerationStructureBuildRangeInfoKHR>	accelerationStructureBuildRangeInfoKHR;
2152 		std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
2153 		std::vector<deUint32>									maxPrimitiveCounts;
2154 		prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
2155 
2156 		const VkAccelerationStructureGeometryKHR*				accelerationStructureGeometriesKHRPointer	= accelerationStructureGeometriesKHR.data();
2157 		const VkAccelerationStructureGeometryKHR* const*		accelerationStructureGeometry				= accelerationStructureGeometriesKHRPointers.data();
2158 
2159 		VkAccelerationStructureBuildGeometryInfoKHR	accelerationStructureBuildGeometryInfoKHR	=
2160 		{
2161 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,			//  VkStructureType										sType;
2162 			DE_NULL,																	//  const void*											pNext;
2163 			VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,							//  VkAccelerationStructureTypeKHR						type;
2164 			m_buildFlags,																//  VkBuildAccelerationStructureFlagsKHR				flags;
2165 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,								//  VkBuildAccelerationStructureModeKHR					mode;
2166 			DE_NULL,																	//  VkAccelerationStructureKHR							srcAccelerationStructure;
2167 			DE_NULL,																	//  VkAccelerationStructureKHR							dstAccelerationStructure;
2168 			static_cast<deUint32>(accelerationStructureGeometriesKHR.size()),			//  deUint32											geometryCount;
2169 			m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,	//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2170 			m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,				//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2171 			makeDeviceOrHostAddressKHR(DE_NULL)											//  VkDeviceOrHostAddressKHR							scratchData;
2172 		};
2173 
2174 		VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2175 		{
2176 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2177 			DE_NULL,														//  const void*		pNext;
2178 			0,																//  VkDeviceSize	accelerationStructureSize;
2179 			0,																//  VkDeviceSize	updateScratchSize;
2180 			0																//  VkDeviceSize	buildScratchSize;
2181 		};
2182 
2183 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2184 
2185 		std::get<0>(result) = sizeInfo.accelerationStructureSize;
2186 		std::get<1>(result) = sizeInfo.updateScratchSize;
2187 		std::get<2>(result) = sizeInfo.buildScratchSize;
2188 		std::get<3>(result) = getVertexBufferSize(m_geometriesData);
2189 		std::get<4>(result) = getIndexBufferSize(m_geometriesData);
2190 	}
2191 
2192 	return result;
2193 }
2194 
preCreateSetSizesAndOffsets(const Info & info,const VkDeviceSize accStrSize,const VkDeviceSize updateScratchSize,const VkDeviceSize buildScratchSize)2195 void BottomLevelAccelerationStructurePoolMember::preCreateSetSizesAndOffsets (const Info&			info,
2196 																			  const VkDeviceSize	accStrSize,
2197 																			  const VkDeviceSize	updateScratchSize,
2198 																			  const VkDeviceSize	buildScratchSize)
2199 {
2200 	m_info				= info;
2201 	m_structureSize		= accStrSize;
2202 	m_updateScratchSize	= updateScratchSize;
2203 	m_buildScratchSize	= buildScratchSize;
2204 }
2205 
createAccellerationStructure(const DeviceInterface & vk,const VkDevice device,VkDeviceAddress deviceAddress)2206 void BottomLevelAccelerationStructurePoolMember::createAccellerationStructure (const DeviceInterface&	vk,
2207 																			   const VkDevice			device,
2208 																			   VkDeviceAddress			deviceAddress)
2209 {
2210 	const VkAccelerationStructureTypeKHR		structureType						= (m_createGeneric
2211 																					   ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2212 																					   : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
2213 	const VkAccelerationStructureCreateInfoKHR	accelerationStructureCreateInfoKHR
2214 	{
2215 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,						//  VkStructureType											sType;
2216 		DE_NULL,																		//  const void*												pNext;
2217 		m_createFlags,																	//  VkAccelerationStructureCreateFlagsKHR					createFlags;
2218 		getAccelerationStructureBuffer()->get(),										//  VkBuffer												buffer;
2219 		getAccelerationStructureBufferOffset(),											//  VkDeviceSize											offset;
2220 		m_structureSize,																//  VkDeviceSize											size;
2221 		structureType,																	//  VkAccelerationStructureTypeKHR							type;
2222 		deviceAddress																	//  VkDeviceAddress											deviceAddress;
2223 	};
2224 
2225 	m_accelerationStructureKHR	= createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2226 }
2227 
~TopLevelAccelerationStructure()2228 TopLevelAccelerationStructure::~TopLevelAccelerationStructure ()
2229 {
2230 }
2231 
TopLevelAccelerationStructure()2232 TopLevelAccelerationStructure::TopLevelAccelerationStructure ()
2233 	: m_structureSize		(0u)
2234 	, m_updateScratchSize	(0u)
2235 	, m_buildScratchSize	(0u)
2236 {
2237 }
2238 
setInstanceCount(const size_t instanceCount)2239 void TopLevelAccelerationStructure::setInstanceCount (const size_t instanceCount)
2240 {
2241 	m_bottomLevelInstances.reserve(instanceCount);
2242 	m_instanceData.reserve(instanceCount);
2243 }
2244 
addInstance(de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,const VkTransformMatrixKHR & matrix,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags)2245 void TopLevelAccelerationStructure::addInstance (de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelStructure,
2246 												 const VkTransformMatrixKHR&						matrix,
2247 												 deUint32											instanceCustomIndex,
2248 												 deUint32											mask,
2249 												 deUint32											instanceShaderBindingTableRecordOffset,
2250 												 VkGeometryInstanceFlagsKHR							flags)
2251 {
2252 	m_bottomLevelInstances.push_back(bottomLevelStructure);
2253 	m_instanceData.push_back(InstanceData(matrix, instanceCustomIndex, mask, instanceShaderBindingTableRecordOffset, flags));
2254 }
2255 
getStructureBuildSizes() const2256 VkAccelerationStructureBuildSizesInfoKHR TopLevelAccelerationStructure::getStructureBuildSizes () const
2257 {
2258 	return
2259 	{
2260 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2261 		DE_NULL,														//  const void*		pNext;
2262 		m_structureSize,												//  VkDeviceSize	accelerationStructureSize;
2263 		m_updateScratchSize,											//  VkDeviceSize	updateScratchSize;
2264 		m_buildScratchSize												//  VkDeviceSize	buildScratchSize;
2265 	};
2266 }
2267 
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)2268 void TopLevelAccelerationStructure::createAndBuild (const DeviceInterface&	vk,
2269 													const VkDevice			device,
2270 													const VkCommandBuffer	cmdBuffer,
2271 													Allocator&				allocator,
2272 													VkDeviceAddress			deviceAddress)
2273 {
2274 	create(vk, device, allocator, 0u, deviceAddress);
2275 	build(vk, device, cmdBuffer);
2276 }
2277 
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,TopLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)2278 void TopLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&				vk,
2279 													   const VkDevice						device,
2280 													   const VkCommandBuffer				cmdBuffer,
2281 													   Allocator&							allocator,
2282 													   TopLevelAccelerationStructure*		accelerationStructure,
2283 													   VkDeviceSize							compactCopySize,
2284 													   VkDeviceAddress						deviceAddress)
2285 {
2286 	DE_ASSERT(accelerationStructure != NULL);
2287 	VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
2288 	DE_ASSERT(copiedSize != 0u);
2289 
2290 	create(vk, device, allocator, copiedSize, deviceAddress);
2291 	copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
2292 }
2293 
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)2294 void TopLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface&					vk,
2295 															  const VkDevice							device,
2296 															  const VkCommandBuffer						cmdBuffer,
2297 															  Allocator&								allocator,
2298 															  SerialStorage*							storage,
2299 															  VkDeviceAddress							deviceAddress)
2300 {
2301 	DE_ASSERT(storage != NULL);
2302 	DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
2303 	create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
2304 	if (storage->hasDeepFormat()) createAndDeserializeBottoms(vk, device, cmdBuffer, allocator, storage);
2305 	deserialize(vk, device, cmdBuffer, storage);
2306 }
2307 
createInstanceBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelInstances,std::vector<InstanceData> instanceData,const bool tryCachedMemory)2308 BufferWithMemory* createInstanceBuffer (const DeviceInterface&											vk,
2309 										const VkDevice													device,
2310 										Allocator&														allocator,
2311 										std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	bottomLevelInstances,
2312 										std::vector<InstanceData>										instanceData,
2313 										const bool														tryCachedMemory)
2314 {
2315 	DE_ASSERT(bottomLevelInstances.size() != 0);
2316 	DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2317 	DE_UNREF(instanceData);
2318 
2319 	BufferWithMemory*			result				= nullptr;
2320 	const VkDeviceSize			bufferSizeBytes		= bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2321 	const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2322 	if (tryCachedMemory) try
2323 	{
2324 		result = new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2325 	}
2326 	catch (const tcu::NotSupportedError&)
2327 	{
2328 		result = nullptr;
2329 	}
2330 	return result
2331 			? result
2332 			: new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2333 }
2334 
updateSingleInstance(const DeviceInterface & vk,const VkDevice device,const BottomLevelAccelerationStructure & bottomLevelAccelerationStructure,const InstanceData & instanceData,deUint8 * bufferLocation,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2335 void updateSingleInstance (const DeviceInterface&					vk,
2336 						   const VkDevice							device,
2337 						   const BottomLevelAccelerationStructure&	bottomLevelAccelerationStructure,
2338 						   const InstanceData&						instanceData,
2339 						   deUint8*									bufferLocation,
2340 						   VkAccelerationStructureBuildTypeKHR		buildType,
2341 						   bool										inactiveInstances)
2342 {
2343 	const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
2344 
2345 	// This part needs to be fixed once a new version of the VkAccelerationStructureInstanceKHR will be added to vkStructTypes.inl
2346 	VkDeviceAddress accelerationStructureAddress;
2347 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2348 	{
2349 		VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
2350 		{
2351 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,	// VkStructureType				sType;
2352 			DE_NULL,															// const void*					pNext;
2353 			accelerationStructureKHR											// VkAccelerationStructureKHR	accelerationStructure;
2354 		};
2355 		accelerationStructureAddress = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
2356 	}
2357 
2358 	deUint64 structureReference;
2359 	if (inactiveInstances)
2360 	{
2361 		// Instances will be marked inactive by making their references VK_NULL_HANDLE or having address zero.
2362 		structureReference = 0ull;
2363 	}
2364 	else
2365 	{
2366 		structureReference	= (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2367 							? deUint64(accelerationStructureAddress)
2368 							: deUint64(accelerationStructureKHR.getInternal());
2369 	}
2370 
2371 	VkAccelerationStructureInstanceKHR	accelerationStructureInstanceKHR = makeVkAccelerationStructureInstanceKHR
2372 	(
2373 		instanceData.matrix,									//  VkTransformMatrixKHR		transform;
2374 		instanceData.instanceCustomIndex,						//  deUint32					instanceCustomIndex:24;
2375 		instanceData.mask,										//  deUint32					mask:8;
2376 		instanceData.instanceShaderBindingTableRecordOffset,	//  deUint32					instanceShaderBindingTableRecordOffset:24;
2377 		instanceData.flags,										//  VkGeometryInstanceFlagsKHR	flags:8;
2378 		structureReference										//  deUint64					accelerationStructureReference;
2379 	);
2380 
2381 	deMemcpy(bufferLocation, &accelerationStructureInstanceKHR, sizeof(VkAccelerationStructureInstanceKHR));
2382 }
2383 
updateInstanceBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelInstances,const std::vector<InstanceData> & instanceData,const BufferWithMemory * instanceBuffer,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2384 void updateInstanceBuffer (const DeviceInterface&												vk,
2385 						   const VkDevice														device,
2386 						   const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>&	bottomLevelInstances,
2387 						   const std::vector<InstanceData>&										instanceData,
2388 						   const BufferWithMemory*												instanceBuffer,
2389 						   VkAccelerationStructureBuildTypeKHR									buildType,
2390 						   bool																	inactiveInstances)
2391 {
2392 	DE_ASSERT(bottomLevelInstances.size() != 0);
2393 	DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2394 
2395 	auto&			instancesAlloc		= instanceBuffer->getAllocation();
2396 	auto			bufferStart			= reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2397 	VkDeviceSize	bufferOffset		= 0ull;
2398 
2399 	for (size_t instanceNdx = 0; instanceNdx < bottomLevelInstances.size(); ++instanceNdx)
2400 	{
2401 		const auto& blas = *bottomLevelInstances[instanceNdx];
2402 		updateSingleInstance(vk, device, blas, instanceData[instanceNdx], bufferStart + bufferOffset, buildType, inactiveInstances);
2403 		bufferOffset += sizeof(VkAccelerationStructureInstanceKHR);
2404 	}
2405 
2406 	flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2407 }
2408 
2409 class TopLevelAccelerationStructureKHR : public TopLevelAccelerationStructure
2410 {
2411 public:
2412 	static deUint32											getRequiredAllocationCount							(void);
2413 
2414 															TopLevelAccelerationStructureKHR					();
2415 															TopLevelAccelerationStructureKHR					(const TopLevelAccelerationStructureKHR&		other) = delete;
2416 	virtual													~TopLevelAccelerationStructureKHR					();
2417 
2418 	void													setBuildType										(const VkAccelerationStructureBuildTypeKHR		buildType) override;
2419 	void													setCreateFlags										(const VkAccelerationStructureCreateFlagsKHR	createFlags) override;
2420 	void													setCreateGeneric									(bool											createGeneric) override;
2421 	void													setBuildFlags										(const VkBuildAccelerationStructureFlagsKHR		buildFlags) override;
2422 	void													setBuildWithoutPrimitives							(bool											buildWithoutPrimitives) override;
2423 	void													setInactiveInstances								(bool											inactiveInstances) override;
2424 	void													setDeferredOperation								(const bool										deferredOperation,
2425 																												 const deUint32									workerThreadCount) override;
2426 	void													setUseArrayOfPointers								(const bool										useArrayOfPointers) override;
2427 	void													setIndirectBuildParameters							(const VkBuffer									indirectBuffer,
2428 																												 const VkDeviceSize								indirectBufferOffset,
2429 																												 const deUint32									indirectBufferStride) override;
2430 	void													setUsePPGeometries									(const bool										usePPGeometries) override;
2431 	void													setTryCachedMemory									(const bool										tryCachedMemory) override;
2432 	VkBuildAccelerationStructureFlagsKHR					getBuildFlags										() const override;
2433 
2434 	void													getCreationSizes									(const DeviceInterface&							vk,
2435 																												 const VkDevice									device,
2436 																												 const VkDeviceSize								structureSize,
2437 																												 CreationSizes&									sizes) override;
2438 	void													create												(const DeviceInterface&							vk,
2439 																												 const VkDevice									device,
2440 																												 Allocator&										allocator,
2441 																												 VkDeviceSize									structureSize,
2442 																												 VkDeviceAddress								deviceAddress			= 0u,
2443 																												 const void*									pNext					= DE_NULL,
2444 																												 const MemoryRequirement&						addMemoryRequirement	= MemoryRequirement::Any) override;
2445 	void													build												(const DeviceInterface&							vk,
2446 																												 const VkDevice									device,
2447 																												 const VkCommandBuffer							cmdBuffer) override;
2448 	void													copyFrom											(const DeviceInterface&							vk,
2449 																												 const VkDevice									device,
2450 																												 const VkCommandBuffer							cmdBuffer,
2451 																												 TopLevelAccelerationStructure*					accelerationStructure,
2452 																												 bool											compactCopy) override;
2453 	void													serialize											(const DeviceInterface&							vk,
2454 																												 const VkDevice									device,
2455 																												 const VkCommandBuffer							cmdBuffer,
2456 																												 SerialStorage*									storage) override;
2457 	void													deserialize											(const DeviceInterface&							vk,
2458 																												 const VkDevice									device,
2459 																												 const VkCommandBuffer							cmdBuffer,
2460 																												 SerialStorage*									storage) override;
2461 
2462 	std::vector<VkDeviceSize>								getSerializingSizes									(const DeviceInterface&							vk,
2463 																												 const VkDevice									device,
2464 																												 const VkQueue									queue,
2465 																												 const deUint32									queueFamilyIndex) override;
2466 
2467 	std::vector<deUint64>									getSerializingAddresses								(const DeviceInterface&							vk,
2468 																												 const VkDevice									device) const override;
2469 
2470 
2471 	const VkAccelerationStructureKHR*						getPtr												(void) const override;
2472 
2473 	void													updateInstanceMatrix								(const DeviceInterface&							vk,
2474 																												 const VkDevice									device,
2475 																												 size_t											instanceIndex,
2476 																												 const VkTransformMatrixKHR&					matrix) override;
2477 
2478 protected:
2479 	VkAccelerationStructureBuildTypeKHR						m_buildType;
2480 	VkAccelerationStructureCreateFlagsKHR					m_createFlags;
2481 	bool													m_createGeneric;
2482 	VkBuildAccelerationStructureFlagsKHR					m_buildFlags;
2483 	bool													m_buildWithoutPrimitives;
2484 	bool													m_inactiveInstances;
2485 	bool													m_deferredOperation;
2486 	deUint32												m_workerThreadCount;
2487 	bool													m_useArrayOfPointers;
2488 	de::MovePtr<BufferWithMemory>							m_accelerationStructureBuffer;
2489 	de::MovePtr<BufferWithMemory>							m_instanceBuffer;
2490 	de::MovePtr<BufferWithMemory>							m_instanceAddressBuffer;
2491 	de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
2492 	std::vector<deUint8>									m_hostScratchBuffer;
2493 	Move<VkAccelerationStructureKHR>						m_accelerationStructureKHR;
2494 	VkBuffer												m_indirectBuffer;
2495 	VkDeviceSize											m_indirectBufferOffset;
2496 	deUint32												m_indirectBufferStride;
2497 	bool													m_usePPGeometries;
2498 	bool													m_tryCachedMemory;
2499 
2500 
2501 	void													prepareInstances									(const DeviceInterface&							vk,
2502 																												 const VkDevice									device,
2503 																												 VkAccelerationStructureGeometryKHR&			accelerationStructureGeometryKHR,
2504 																												 std::vector<deUint32>&							maxPrimitiveCounts);
2505 
2506 	void													serializeBottoms									(const DeviceInterface&							vk,
2507 																												 const VkDevice									device,
2508 																												 const VkCommandBuffer							cmdBuffer,
2509 																												 SerialStorage*									storage,
2510 																												 VkDeferredOperationKHR							deferredOperation);
2511 
2512 	void													createAndDeserializeBottoms							(const DeviceInterface&							vk,
2513 																												 const VkDevice									device,
2514 																												 const VkCommandBuffer							cmdBuffer,
2515 																												 Allocator&										allocator,
2516 																												 SerialStorage*									storage) override;
2517 };
2518 
getRequiredAllocationCount(void)2519 deUint32 TopLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
2520 {
2521 	/*
2522 		de::MovePtr<BufferWithMemory>							m_instanceBuffer;
2523 		de::MovePtr<Allocation>									m_accelerationStructureAlloc;
2524 		de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
2525 	*/
2526 	return 3u;
2527 }
2528 
TopLevelAccelerationStructureKHR()2529 TopLevelAccelerationStructureKHR::TopLevelAccelerationStructureKHR ()
2530 	: TopLevelAccelerationStructure	()
2531 	, m_buildType					(VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2532 	, m_createFlags					(0u)
2533 	, m_createGeneric				(false)
2534 	, m_buildFlags					(0u)
2535 	, m_buildWithoutPrimitives		(false)
2536 	, m_inactiveInstances			(false)
2537 	, m_deferredOperation			(false)
2538 	, m_workerThreadCount			(0)
2539 	, m_useArrayOfPointers			(false)
2540 	, m_accelerationStructureBuffer	(DE_NULL)
2541 	, m_instanceBuffer				(DE_NULL)
2542 	, m_instanceAddressBuffer		(DE_NULL)
2543 	, m_deviceScratchBuffer			(DE_NULL)
2544 	, m_accelerationStructureKHR	()
2545 	, m_indirectBuffer				(DE_NULL)
2546 	, m_indirectBufferOffset		(0)
2547 	, m_indirectBufferStride		(0)
2548 	, m_usePPGeometries				(false)
2549 	, m_tryCachedMemory				(true)
2550 {
2551 }
2552 
~TopLevelAccelerationStructureKHR()2553 TopLevelAccelerationStructureKHR::~TopLevelAccelerationStructureKHR ()
2554 {
2555 }
2556 
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)2557 void TopLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR	buildType)
2558 {
2559 	m_buildType = buildType;
2560 }
2561 
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)2562 void TopLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR	createFlags)
2563 {
2564 	m_createFlags = createFlags;
2565 }
2566 
setCreateGeneric(bool createGeneric)2567 void TopLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
2568 {
2569 	m_createGeneric = createGeneric;
2570 }
2571 
setInactiveInstances(bool inactiveInstances)2572 void TopLevelAccelerationStructureKHR::setInactiveInstances (bool inactiveInstances)
2573 {
2574 	m_inactiveInstances = inactiveInstances;
2575 }
2576 
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)2577 void TopLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR	buildFlags)
2578 {
2579 	m_buildFlags = buildFlags;
2580 }
2581 
setBuildWithoutPrimitives(bool buildWithoutPrimitives)2582 void TopLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
2583 {
2584 	m_buildWithoutPrimitives = buildWithoutPrimitives;
2585 }
2586 
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)2587 void TopLevelAccelerationStructureKHR::setDeferredOperation (const bool		deferredOperation,
2588 															 const deUint32	workerThreadCount)
2589 {
2590 	m_deferredOperation = deferredOperation;
2591 	m_workerThreadCount = workerThreadCount;
2592 }
2593 
setUseArrayOfPointers(const bool useArrayOfPointers)2594 void TopLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool	useArrayOfPointers)
2595 {
2596 	m_useArrayOfPointers = useArrayOfPointers;
2597 }
2598 
setUsePPGeometries(const bool usePPGeometries)2599 void TopLevelAccelerationStructureKHR::setUsePPGeometries (const bool usePPGeometries)
2600 {
2601 	m_usePPGeometries = usePPGeometries;
2602 }
2603 
setTryCachedMemory(const bool tryCachedMemory)2604 void TopLevelAccelerationStructureKHR::setTryCachedMemory (const bool tryCachedMemory)
2605 {
2606 	m_tryCachedMemory = tryCachedMemory;
2607 }
2608 
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)2609 void TopLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer		indirectBuffer,
2610 																   const VkDeviceSize	indirectBufferOffset,
2611 																   const deUint32		indirectBufferStride)
2612 {
2613 	m_indirectBuffer		= indirectBuffer;
2614 	m_indirectBufferOffset	= indirectBufferOffset;
2615 	m_indirectBufferStride	= indirectBufferStride;
2616 }
2617 
getBuildFlags() const2618 VkBuildAccelerationStructureFlagsKHR TopLevelAccelerationStructureKHR::getBuildFlags () const
2619 {
2620 	return m_buildFlags;
2621 }
2622 
sum() const2623 VkDeviceSize TopLevelAccelerationStructure::CreationSizes::sum () const
2624 {
2625 	return structure + updateScratch + buildScratch + instancePointers + instancesBuffer;
2626 }
2627 
getCreationSizes(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize structureSize,CreationSizes & sizes)2628 void TopLevelAccelerationStructureKHR::getCreationSizes (const DeviceInterface&	vk,
2629 														 const VkDevice			device,
2630 														 const VkDeviceSize		structureSize,
2631 														 CreationSizes&			sizes)
2632 {
2633 	// AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2634 	// or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2635 	DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2636 
2637 	if (structureSize == 0)
2638 	{
2639 		VkAccelerationStructureGeometryKHR		accelerationStructureGeometryKHR;
2640 		const auto								accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2641 		std::vector<deUint32>					maxPrimitiveCounts;
2642 		prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2643 
2644 		VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR		=
2645 		{
2646 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,						//  VkStructureType										sType;
2647 			DE_NULL,																				//  const void*											pNext;
2648 			VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,											//  VkAccelerationStructureTypeKHR						type;
2649 			m_buildFlags,																			//  VkBuildAccelerationStructureFlagsKHR				flags;
2650 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,											//  VkBuildAccelerationStructureModeKHR					mode;
2651 			DE_NULL,																				//  VkAccelerationStructureKHR							srcAccelerationStructure;
2652 			DE_NULL,																				//  VkAccelerationStructureKHR							dstAccelerationStructure;
2653 			1u,																						//  deUint32											geometryCount;
2654 			(m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),						//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2655 			(m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),					//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2656 			makeDeviceOrHostAddressKHR(DE_NULL)														//  VkDeviceOrHostAddressKHR							scratchData;
2657 		};
2658 
2659 		VkAccelerationStructureBuildSizesInfoKHR	sizeInfo =
2660 		{
2661 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2662 			DE_NULL,														//  const void*		pNext;
2663 			0,																//  VkDeviceSize	accelerationStructureSize;
2664 			0,																//  VkDeviceSize	updateScratchSize;
2665 			0																//  VkDeviceSize	buildScratchSize;
2666 		};
2667 
2668 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2669 
2670 		sizes.structure		= sizeInfo.accelerationStructureSize;
2671 		sizes.updateScratch	= sizeInfo.updateScratchSize;
2672 		sizes.buildScratch	= sizeInfo.buildScratchSize;
2673 	}
2674 	else
2675 	{
2676 		sizes.structure		= structureSize;
2677 		sizes.updateScratch	= 0u;
2678 		sizes.buildScratch	= 0u;
2679 	}
2680 
2681 	sizes.instancePointers	= 0u;
2682 	if (m_useArrayOfPointers)
2683 	{
2684 		const size_t	pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2685 		sizes.instancePointers		= static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize);
2686 	}
2687 
2688 	sizes.instancesBuffer = m_bottomLevelInstances.empty() ? 0u : m_bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2689 }
2690 
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement)2691 void TopLevelAccelerationStructureKHR::create (const DeviceInterface&				vk,
2692 											   const VkDevice						device,
2693 											   Allocator&							allocator,
2694 											   VkDeviceSize							structureSize,
2695 											   VkDeviceAddress						deviceAddress,
2696 											   const void*							pNext,
2697 											   const MemoryRequirement&				addMemoryRequirement)
2698 {
2699 	// AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2700 	// or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2701 	DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2702 
2703 	if (structureSize == 0)
2704 	{
2705 		VkAccelerationStructureGeometryKHR		accelerationStructureGeometryKHR;
2706 		const auto								accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2707 		std::vector<deUint32>					maxPrimitiveCounts;
2708 		prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2709 
2710 		VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR		=
2711 		{
2712 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,						//  VkStructureType										sType;
2713 			DE_NULL,																				//  const void*											pNext;
2714 			VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,											//  VkAccelerationStructureTypeKHR						type;
2715 			m_buildFlags,																			//  VkBuildAccelerationStructureFlagsKHR				flags;
2716 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,											//  VkBuildAccelerationStructureModeKHR					mode;
2717 			DE_NULL,																				//  VkAccelerationStructureKHR							srcAccelerationStructure;
2718 			DE_NULL,																				//  VkAccelerationStructureKHR							dstAccelerationStructure;
2719 			1u,																						//  deUint32											geometryCount;
2720 			(m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),						//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2721 			(m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),					//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2722 			makeDeviceOrHostAddressKHR(DE_NULL)														//  VkDeviceOrHostAddressKHR							scratchData;
2723 		};
2724 
2725 		VkAccelerationStructureBuildSizesInfoKHR	sizeInfo =
2726 		{
2727 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2728 			DE_NULL,														//  const void*		pNext;
2729 			0,																//  VkDeviceSize	accelerationStructureSize;
2730 			0,																//  VkDeviceSize	updateScratchSize;
2731 			0																//  VkDeviceSize	buildScratchSize;
2732 		};
2733 
2734 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2735 
2736 		m_structureSize		= sizeInfo.accelerationStructureSize;
2737 		m_updateScratchSize	= sizeInfo.updateScratchSize;
2738 		m_buildScratchSize	= sizeInfo.buildScratchSize;
2739 	}
2740 	else
2741 	{
2742 		m_structureSize		= structureSize;
2743 		m_updateScratchSize	= 0u;
2744 		m_buildScratchSize	= 0u;
2745 	}
2746 
2747 	{
2748 		const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2749 		const MemoryRequirement		memoryRequirement	= addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
2750 
2751 		try
2752 		{
2753 			m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
2754 		}
2755 		catch (const tcu::NotSupportedError&)
2756 		{
2757 			// retry without Cached flag
2758 			m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
2759 		}
2760 	}
2761 
2762 	{
2763 		const VkAccelerationStructureTypeKHR		structureType						= (m_createGeneric
2764 																						   ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2765 																						   : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
2766 		const VkAccelerationStructureCreateInfoKHR	accelerationStructureCreateInfoKHR	=
2767 		{
2768 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,	//  VkStructureType											sType;
2769 			pNext,														//  const void*												pNext;
2770 			m_createFlags,												//  VkAccelerationStructureCreateFlagsKHR					createFlags;
2771 			m_accelerationStructureBuffer->get(),						//  VkBuffer												buffer;
2772 			0u,															//  VkDeviceSize											offset;
2773 			m_structureSize,											//  VkDeviceSize											size;
2774 			structureType,												//  VkAccelerationStructureTypeKHR							type;
2775 			deviceAddress												//  VkDeviceAddress											deviceAddress;
2776 		};
2777 
2778 		m_accelerationStructureKHR	= createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2779 	}
2780 
2781 	if (m_buildScratchSize > 0u)
2782 	{
2783 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2784 		{
2785 			const VkBufferCreateInfo		bufferCreateInfo	= makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2786 			m_deviceScratchBuffer								= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2787 		}
2788 		else
2789 		{
2790 			m_hostScratchBuffer.resize(static_cast<size_t>(m_buildScratchSize));
2791 		}
2792 	}
2793 
2794 	if (m_useArrayOfPointers)
2795 	{
2796 		const size_t				pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2797 		const VkBufferCreateInfo	bufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2798 		m_instanceAddressBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2799 	}
2800 
2801 	if(!m_bottomLevelInstances.empty())
2802 		m_instanceBuffer = de::MovePtr<BufferWithMemory>(createInstanceBuffer(vk, device, allocator, m_bottomLevelInstances, m_instanceData, m_tryCachedMemory));
2803 }
2804 
updateInstanceMatrix(const DeviceInterface & vk,const VkDevice device,size_t instanceIndex,const VkTransformMatrixKHR & matrix)2805 void TopLevelAccelerationStructureKHR::updateInstanceMatrix (const DeviceInterface& vk, const VkDevice device, size_t instanceIndex, const VkTransformMatrixKHR& matrix)
2806 {
2807 	DE_ASSERT(m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR);
2808 	DE_ASSERT(instanceIndex < m_bottomLevelInstances.size());
2809 	DE_ASSERT(instanceIndex < m_instanceData.size());
2810 
2811 	const auto&		blas			= *m_bottomLevelInstances[instanceIndex];
2812 	auto&			instanceData	= m_instanceData[instanceIndex];
2813 	auto&			instancesAlloc	= m_instanceBuffer->getAllocation();
2814 	auto			bufferStart		= reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2815 	VkDeviceSize	bufferOffset	= sizeof(VkAccelerationStructureInstanceKHR) * instanceIndex;
2816 
2817 	instanceData.matrix = matrix;
2818 	updateSingleInstance(vk, device, blas, instanceData, bufferStart + bufferOffset, m_buildType, m_inactiveInstances);
2819 	flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2820 }
2821 
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer)2822 void TopLevelAccelerationStructureKHR::build (const DeviceInterface&	vk,
2823 											  const VkDevice			device,
2824 											  const VkCommandBuffer		cmdBuffer)
2825 {
2826 	DE_ASSERT(!m_bottomLevelInstances.empty());
2827 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2828 	DE_ASSERT(m_buildScratchSize != 0);
2829 
2830 	updateInstanceBuffer(vk, device, m_bottomLevelInstances, m_instanceData, m_instanceBuffer.get(), m_buildType, m_inactiveInstances);
2831 
2832 	VkAccelerationStructureGeometryKHR		accelerationStructureGeometryKHR;
2833 	const auto								accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2834 	std::vector<deUint32>					maxPrimitiveCounts;
2835 	prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2836 
2837 	VkDeviceOrHostAddressKHR				scratchData										= (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2838 																							? makeDeviceOrHostAddressKHR(vk, device, m_deviceScratchBuffer->get(), 0)
2839 																							: makeDeviceOrHostAddressKHR(m_hostScratchBuffer.data());
2840 
2841 	VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR		=
2842 	{
2843 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,						//  VkStructureType										sType;
2844 		DE_NULL,																				//  const void*											pNext;
2845 		VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,											//  VkAccelerationStructureTypeKHR						type;
2846 		m_buildFlags,																			//  VkBuildAccelerationStructureFlagsKHR				flags;
2847 		VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,											//  VkBuildAccelerationStructureModeKHR					mode;
2848 		DE_NULL,																				//  VkAccelerationStructureKHR							srcAccelerationStructure;
2849 		m_accelerationStructureKHR.get(),														//  VkAccelerationStructureKHR							dstAccelerationStructure;
2850 		1u,																						//  deUint32											geometryCount;
2851 		(m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),						//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2852 		(m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),					//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2853 		scratchData																				//  VkDeviceOrHostAddressKHR							scratchData;
2854 	};
2855 
2856 	const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : static_cast<deUint32>(m_bottomLevelInstances.size()));
2857 
2858 	VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfoKHR =
2859 	{
2860 		primitiveCount,	//  deUint32	primitiveCount;
2861 		0,				//  deUint32	primitiveOffset;
2862 		0,				//  deUint32	firstVertex;
2863 		0				//  deUint32	transformOffset;
2864 	};
2865 	VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr	= &accelerationStructureBuildRangeInfoKHR;
2866 
2867 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2868 	{
2869 		if (m_indirectBuffer == DE_NULL)
2870 			vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2871 		else
2872 		{
2873 			VkDeviceAddress	indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
2874 			deUint32*		pMaxPrimitiveCounts = maxPrimitiveCounts.data();
2875 			vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
2876 		}
2877 	}
2878 	else if (!m_deferredOperation)
2879 	{
2880 		VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
2881 	}
2882 	else
2883 	{
2884 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
2885 		const auto deferredOperation	= deferredOperationPtr.get();
2886 
2887 		VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2888 
2889 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2890 
2891 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2892 
2893 		accelerationStructureBuildGeometryInfoKHR.pNext = DE_NULL;
2894 	}
2895 
2896 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2897 	{
2898 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2899 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
2900 
2901 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2902 	}
2903 }
2904 
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,TopLevelAccelerationStructure * accelerationStructure,bool compactCopy)2905 void TopLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&				vk,
2906 												 const VkDevice						device,
2907 												 const VkCommandBuffer				cmdBuffer,
2908 												 TopLevelAccelerationStructure*		accelerationStructure,
2909 												 bool								compactCopy)
2910 {
2911 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2912 	DE_ASSERT(accelerationStructure != DE_NULL);
2913 
2914 	VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
2915 	{
2916 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,															// VkStructureType						sType;
2917 		DE_NULL,																										// const void*							pNext;
2918 		*(accelerationStructure->getPtr()),																				// VkAccelerationStructureKHR			src;
2919 		*(getPtr()),																									// VkAccelerationStructureKHR			dst;
2920 		compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR	// VkCopyAccelerationStructureModeKHR	mode;
2921 	};
2922 
2923 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2924 	{
2925 		vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
2926 	}
2927 	else if (!m_deferredOperation)
2928 	{
2929 		VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
2930 	}
2931 	else
2932 	{
2933 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
2934 		const auto deferredOperation	= deferredOperationPtr.get();
2935 
2936 		VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
2937 
2938 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2939 
2940 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2941 	}
2942 
2943 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2944 	{
2945 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2946 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
2947 
2948 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2949 	}
2950 
2951 }
2952 
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)2953 void TopLevelAccelerationStructureKHR::serialize (const DeviceInterface&	vk,
2954 												  const VkDevice			device,
2955 												  const VkCommandBuffer		cmdBuffer,
2956 												  SerialStorage*			storage)
2957 {
2958 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2959 	DE_ASSERT(storage != DE_NULL);
2960 
2961 	const VkCopyAccelerationStructureToMemoryInfoKHR	copyAccelerationStructureInfo	=
2962 	{
2963 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,	// VkStructureType						sType;
2964 		DE_NULL,															// const void*							pNext;
2965 		*(getPtr()),														// VkAccelerationStructureKHR			src;
2966 		storage->getAddress(vk, device, m_buildType),						// VkDeviceOrHostAddressKHR				dst;
2967 		VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR	mode;
2968 	};
2969 
2970 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2971 	{
2972 		vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
2973 		if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2974 	}
2975 	else if (!m_deferredOperation)
2976 	{
2977 		VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
2978 		if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2979 	}
2980 	else
2981 	{
2982 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
2983 		const auto deferredOperation	= deferredOperationPtr.get();
2984 
2985 		const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
2986 
2987 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2988 		if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, deferredOperation);
2989 
2990 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2991 	}
2992 }
2993 
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)2994 void TopLevelAccelerationStructureKHR::deserialize (const DeviceInterface&	vk,
2995 													const VkDevice			device,
2996 													const VkCommandBuffer	cmdBuffer,
2997 													SerialStorage*			storage)
2998 {
2999 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
3000 	DE_ASSERT(storage != DE_NULL);
3001 
3002 	const VkCopyMemoryToAccelerationStructureInfoKHR	copyAccelerationStructureInfo	=
3003 	{
3004 		VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,	// VkStructureType							sType;
3005 		DE_NULL,															// const void*								pNext;
3006 		storage->getAddressConst(vk, device, m_buildType),					// VkDeviceOrHostAddressConstKHR			src;
3007 		*(getPtr()),														// VkAccelerationStructureKHR				dst;
3008 		VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR		mode;
3009 	};
3010 
3011 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3012 	{
3013 		vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
3014 	}
3015 	else if (!m_deferredOperation)
3016 	{
3017 		VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
3018 	}
3019 	else
3020 	{
3021 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
3022 		const auto deferredOperation	= deferredOperationPtr.get();
3023 
3024 		const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
3025 
3026 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3027 
3028 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3029 	}
3030 
3031 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3032 	{
3033 		const VkAccessFlags		accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3034 		const VkMemoryBarrier	memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
3035 
3036 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3037 	}
3038 }
3039 
serializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage,VkDeferredOperationKHR deferredOperation)3040 void TopLevelAccelerationStructureKHR::serializeBottoms (const DeviceInterface&	vk,
3041 														 const VkDevice			device,
3042 														 const VkCommandBuffer	cmdBuffer,
3043 														 SerialStorage*			storage,
3044 														 VkDeferredOperationKHR	deferredOperation)
3045 {
3046 	DE_UNREF(deferredOperation);
3047 	DE_ASSERT(storage->hasDeepFormat());
3048 
3049 	const std::vector<deUint64>&	addresses		= storage->getSerialInfo().addresses();
3050 	const std::size_t				cbottoms		= m_bottomLevelInstances.size();
3051 
3052 	deUint32						storageIndex	= 0;
3053 	std::vector<deUint64>			matches;
3054 
3055 	for (std::size_t i = 0; i < cbottoms; ++i)
3056 	{
3057 		const deUint64& lookAddr	= addresses[i+1];
3058 		auto			end			= matches.end();
3059 		auto			match		= std::find_if(matches.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
3060 		if (match == end)
3061 		{
3062 			matches.emplace_back(lookAddr);
3063 			m_bottomLevelInstances[i].get()->serialize(vk, device, cmdBuffer, storage->getBottomStorage(storageIndex).get());
3064 			storageIndex += 1;
3065 		}
3066 	}
3067 }
3068 
createAndDeserializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage)3069 void TopLevelAccelerationStructureKHR::createAndDeserializeBottoms (const DeviceInterface&	vk,
3070 																	const VkDevice			device,
3071 																	const VkCommandBuffer	cmdBuffer,
3072 																	Allocator&				allocator,
3073 																	SerialStorage*			storage)
3074 {
3075 	DE_ASSERT(storage->hasDeepFormat());
3076 	DE_ASSERT(m_bottomLevelInstances.size() == 0);
3077 
3078 	const std::vector<deUint64>&					addresses		= storage->getSerialInfo().addresses();
3079 	const std::size_t								cbottoms		= addresses.size() - 1;
3080 	deUint32										storageIndex	= 0;
3081 	std::vector<std::pair<deUint64, std::size_t>>	matches;
3082 
3083 	for (std::size_t i = 0; i < cbottoms; ++i)
3084 	{
3085 		const deUint64& lookAddr	= addresses[i+1];
3086 		auto			end			= matches.end();
3087 		auto			match		= std::find_if(matches.begin(), end, [&](const std::pair<deUint64, std::size_t>& item){ return item.first == lookAddr; });
3088 		if (match != end)
3089 		{
3090 			m_bottomLevelInstances .emplace_back(m_bottomLevelInstances[match->second]);
3091 		}
3092 		else
3093 		{
3094 			de::MovePtr<BottomLevelAccelerationStructure> blas = makeBottomLevelAccelerationStructure();
3095 			blas->createAndDeserializeFrom(vk, device, cmdBuffer, allocator, storage->getBottomStorage(storageIndex).get());
3096 			m_bottomLevelInstances.emplace_back(de::SharedPtr<BottomLevelAccelerationStructure>(blas.release()));
3097 			matches.emplace_back(lookAddr, i);
3098 			storageIndex += 1;
3099 		}
3100 	}
3101 
3102 	std::vector<deUint64>						newAddresses	= getSerializingAddresses(vk, device);
3103 	DE_ASSERT(addresses.size() == newAddresses.size());
3104 
3105 	SerialStorage::AccelerationStructureHeader* header			= storage->getASHeader();
3106 	DE_ASSERT(cbottoms ==header->handleCount);
3107 
3108 	// finally update bottom-level AS addresses before top-level AS deserialization
3109 	for (std::size_t i = 0; i < cbottoms; ++i)
3110 	{
3111 		header->handleArray[i] = newAddresses[i+1];
3112 	}
3113 }
3114 
getSerializingSizes(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const deUint32 queueFamilyIndex)3115 std::vector<VkDeviceSize> TopLevelAccelerationStructureKHR::getSerializingSizes (const DeviceInterface&	vk,
3116 																				 const VkDevice			device,
3117 																				 const VkQueue			queue,
3118 																				 const deUint32			queueFamilyIndex)
3119 {
3120 	const deUint32							queryCount(deUint32(m_bottomLevelInstances.size()) + 1);
3121 	std::vector<VkAccelerationStructureKHR>	handles(queryCount);
3122 	std::vector<VkDeviceSize>				sizes(queryCount);
3123 
3124 	handles[0] = m_accelerationStructureKHR.get();
3125 
3126 	for (deUint32 h = 1; h < queryCount; ++h)
3127 		handles[h] = *m_bottomLevelInstances[h-1].get()->getPtr();
3128 
3129 	if (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR == m_buildType)
3130 		queryAccelerationStructureSize(vk, device, DE_NULL, handles, m_buildType, DE_NULL, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3131 	else
3132 	{
3133 		const Move<VkCommandPool>	cmdPool		= createCommandPool(vk, device, 0, queueFamilyIndex);
3134 		const Move<VkCommandBuffer>	cmdBuffer	= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3135 		const Move<VkQueryPool>		queryPool	= makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3136 
3137 		beginCommandBuffer(vk, *cmdBuffer);
3138 		queryAccelerationStructureSize(vk, device, *cmdBuffer, handles, m_buildType, *queryPool, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3139 		endCommandBuffer(vk, *cmdBuffer);
3140 		submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
3141 
3142 		VK_CHECK(vk.getQueryPoolResults(device, *queryPool, 0u, queryCount, queryCount * sizeof(VkDeviceSize), sizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3143 	}
3144 
3145 	return sizes;
3146 }
3147 
getSerializingAddresses(const DeviceInterface & vk,const VkDevice device) const3148 std::vector<deUint64> TopLevelAccelerationStructureKHR::getSerializingAddresses (const DeviceInterface& vk, const VkDevice device) const
3149 {
3150 	std::vector<deUint64> result(m_bottomLevelInstances.size() + 1);
3151 
3152 	VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
3153 	{
3154 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,	// VkStructureType				sType;
3155 		DE_NULL,															// const void*					pNext;
3156 		DE_NULL																// VkAccelerationStructureKHR	accelerationStructure;
3157 	};
3158 
3159 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3160 	{
3161 		asDeviceAddressInfo.accelerationStructure = m_accelerationStructureKHR.get();
3162 		result[0] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3163 	}
3164 	else
3165 	{
3166 		result[0] = deUint64(getPtr()->getInternal());
3167 	}
3168 
3169 	for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3170 	{
3171 		const BottomLevelAccelerationStructure&		bottomLevelAccelerationStructure	= *m_bottomLevelInstances[instanceNdx];
3172 		const VkAccelerationStructureKHR			accelerationStructureKHR			= *bottomLevelAccelerationStructure.getPtr();
3173 
3174 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3175 		{
3176 			asDeviceAddressInfo.accelerationStructure = accelerationStructureKHR;
3177 			result[instanceNdx+1] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3178 		}
3179 		else
3180 		{
3181 			result[instanceNdx+1] = deUint64(accelerationStructureKHR.getInternal());
3182 		}
3183 	}
3184 
3185 	return result;
3186 }
3187 
getPtr(void) const3188 const VkAccelerationStructureKHR* TopLevelAccelerationStructureKHR::getPtr (void) const
3189 {
3190 	return &m_accelerationStructureKHR.get();
3191 }
3192 
prepareInstances(const DeviceInterface & vk,const VkDevice device,VkAccelerationStructureGeometryKHR & accelerationStructureGeometryKHR,std::vector<deUint32> & maxPrimitiveCounts)3193 void TopLevelAccelerationStructureKHR::prepareInstances (const DeviceInterface&							vk,
3194 														 const VkDevice									device,
3195 														 VkAccelerationStructureGeometryKHR&			accelerationStructureGeometryKHR,
3196 														 std::vector<deUint32>&							maxPrimitiveCounts)
3197 {
3198 	maxPrimitiveCounts.resize(1);
3199 	maxPrimitiveCounts[0] = static_cast<deUint32>(m_bottomLevelInstances.size());
3200 
3201 	VkDeviceOrHostAddressConstKHR							instancesData;
3202 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3203 	{
3204 		if(m_instanceBuffer.get() != DE_NULL)
3205 		{
3206 			if (m_useArrayOfPointers)
3207 			{
3208 				deUint8*						bufferStart			= static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3209 				VkDeviceSize					bufferOffset		= 0;
3210 				VkDeviceOrHostAddressConstKHR	firstInstance		= makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3211 				for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3212 				{
3213 					VkDeviceOrHostAddressConstKHR	currentInstance;
3214 					currentInstance.deviceAddress	= firstInstance.deviceAddress + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3215 
3216 					deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress));
3217 					bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress);
3218 				}
3219 				flushMappedMemoryRange(vk, device, m_instanceAddressBuffer->getAllocation().getMemory(), m_instanceAddressBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
3220 
3221 				instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceAddressBuffer->get(), 0);
3222 			}
3223 			else
3224 				instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3225 		}
3226 		else
3227 			instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3228 	}
3229 	else
3230 	{
3231 		if (m_instanceBuffer.get() != DE_NULL)
3232 		{
3233 			if (m_useArrayOfPointers)
3234 			{
3235 				deUint8*						bufferStart			= static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3236 				VkDeviceSize					bufferOffset		= 0;
3237 				for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3238 				{
3239 					VkDeviceOrHostAddressConstKHR	currentInstance;
3240 					currentInstance.hostAddress	= (deUint8*)m_instanceBuffer->getAllocation().getHostPtr() + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3241 
3242 					deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::hostAddress));
3243 					bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
3244 				}
3245 				instancesData = makeDeviceOrHostAddressConstKHR(m_instanceAddressBuffer->getAllocation().getHostPtr());
3246 			}
3247 			else
3248 				instancesData = makeDeviceOrHostAddressConstKHR(m_instanceBuffer->getAllocation().getHostPtr());
3249 		}
3250 		else
3251 			instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3252 	}
3253 
3254 	VkAccelerationStructureGeometryInstancesDataKHR accelerationStructureGeometryInstancesDataKHR	=
3255 	{
3256 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,	//  VkStructureType					sType;
3257 		DE_NULL,																//  const void*						pNext;
3258 		(VkBool32)( m_useArrayOfPointers ? DE_TRUE : DE_FALSE ),				//  VkBool32						arrayOfPointers;
3259 		instancesData															//  VkDeviceOrHostAddressConstKHR	data;
3260 	};
3261 
3262 	accelerationStructureGeometryKHR					=
3263 	{
3264 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,										//  VkStructureType							sType;
3265 		DE_NULL,																					//  const void*								pNext;
3266 		VK_GEOMETRY_TYPE_INSTANCES_KHR,																//  VkGeometryTypeKHR						geometryType;
3267 		makeVkAccelerationStructureInstancesDataKHR(accelerationStructureGeometryInstancesDataKHR),	//  VkAccelerationStructureGeometryDataKHR	geometry;
3268 		(VkGeometryFlagsKHR)0u																		//  VkGeometryFlagsKHR						flags;
3269 	};
3270 }
3271 
getRequiredAllocationCount(void)3272 deUint32 TopLevelAccelerationStructure::getRequiredAllocationCount (void)
3273 {
3274 	return TopLevelAccelerationStructureKHR::getRequiredAllocationCount();
3275 }
3276 
makeTopLevelAccelerationStructure()3277 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure ()
3278 {
3279 	return de::MovePtr<TopLevelAccelerationStructure>(new TopLevelAccelerationStructureKHR);
3280 }
3281 
queryAccelerationStructureSizeKHR(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3282 bool queryAccelerationStructureSizeKHR (const DeviceInterface&							vk,
3283 										const VkDevice									device,
3284 										const VkCommandBuffer							cmdBuffer,
3285 										const std::vector<VkAccelerationStructureKHR>&	accelerationStructureHandles,
3286 										VkAccelerationStructureBuildTypeKHR				buildType,
3287 										const VkQueryPool								queryPool,
3288 										VkQueryType										queryType,
3289 										deUint32										firstQuery,
3290 										std::vector<VkDeviceSize>&						results)
3291 {
3292 	DE_ASSERT(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR);
3293 
3294 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3295 	{
3296 		// queryPool must be large enough to contain at least (firstQuery + accelerationStructureHandles.size()) queries
3297 		vk.cmdResetQueryPool(cmdBuffer, queryPool, firstQuery, deUint32(accelerationStructureHandles.size()));
3298 		vk.cmdWriteAccelerationStructuresPropertiesKHR(cmdBuffer, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType, queryPool, firstQuery);
3299 		// results cannot be retrieved to CPU at the moment - you need to do it using getQueryPoolResults after cmdBuffer is executed. Meanwhile function returns a vector of 0s.
3300 		results.resize(accelerationStructureHandles.size(), 0u);
3301 		return false;
3302 	}
3303 	// buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
3304 	results.resize(accelerationStructureHandles.size(), 0u);
3305 	vk.writeAccelerationStructuresPropertiesKHR(device, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType,
3306 												sizeof(VkDeviceSize) * accelerationStructureHandles.size(), results.data(), sizeof(VkDeviceSize));
3307 	// results will contain proper values
3308 	return true;
3309 }
3310 
queryAccelerationStructureSize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3311 bool queryAccelerationStructureSize (const DeviceInterface&							vk,
3312 									 const VkDevice									device,
3313 									 const VkCommandBuffer							cmdBuffer,
3314 									 const std::vector<VkAccelerationStructureKHR>&	accelerationStructureHandles,
3315 									 VkAccelerationStructureBuildTypeKHR			buildType,
3316 									 const VkQueryPool								queryPool,
3317 									 VkQueryType									queryType,
3318 									 deUint32										firstQuery,
3319 									 std::vector<VkDeviceSize>&						results)
3320 {
3321 	return queryAccelerationStructureSizeKHR(vk, device, cmdBuffer, accelerationStructureHandles, buildType, queryPool, queryType, firstQuery, results);
3322 }
3323 
RayTracingPipeline()3324 RayTracingPipeline::RayTracingPipeline ()
3325 	: m_shadersModules			()
3326 	, m_pipelineLibraries		()
3327 	, m_shaderCreateInfos		()
3328 	, m_shadersGroupCreateInfos	()
3329 	, m_pipelineCreateFlags		(0U)
3330 	, m_maxRecursionDepth		(1U)
3331 	, m_maxPayloadSize			(0U)
3332 	, m_maxAttributeSize		(0U)
3333 	, m_deferredOperation		(false)
3334 	, m_workerThreadCount		(0)
3335 {
3336 }
3337 
~RayTracingPipeline()3338 RayTracingPipeline::~RayTracingPipeline ()
3339 {
3340 }
3341 
3342 #define CHECKED_ASSIGN_SHADER(SHADER, STAGE)						\
3343 	if (SHADER == VK_SHADER_UNUSED_KHR)								\
3344 		SHADER = STAGE;												\
3345 	else															\
3346 		TCU_THROW(InternalError, "Attempt to reassign shader")
3347 
addShader(VkShaderStageFlagBits shaderStage,Move<VkShaderModule> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfo,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3348 void RayTracingPipeline::addShader (VkShaderStageFlagBits					shaderStage,
3349 									Move<VkShaderModule>					shaderModule,
3350 									deUint32								group,
3351 									const VkSpecializationInfo*				specializationInfo,
3352 									const VkPipelineShaderStageCreateFlags	pipelineShaderStageCreateFlags,
3353 									const void*								pipelineShaderStageCreateInfopNext)
3354 {
3355 	addShader(shaderStage, makeVkSharedPtr(shaderModule), group, specializationInfo, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3356 }
3357 
addShader(VkShaderStageFlagBits shaderStage,de::SharedPtr<Move<VkShaderModule>> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3358 void RayTracingPipeline::addShader (VkShaderStageFlagBits					shaderStage,
3359 									de::SharedPtr<Move<VkShaderModule>>		shaderModule,
3360 									deUint32								group,
3361 									const VkSpecializationInfo*				specializationInfoPtr,
3362 									const VkPipelineShaderStageCreateFlags	pipelineShaderStageCreateFlags,
3363 									const void*								pipelineShaderStageCreateInfopNext)
3364 {
3365 	addShader(shaderStage, **shaderModule, group, specializationInfoPtr, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3366 	m_shadersModules.push_back(shaderModule);
3367 }
3368 
addShader(VkShaderStageFlagBits shaderStage,VkShaderModule shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3369 void RayTracingPipeline::addShader (VkShaderStageFlagBits					shaderStage,
3370 									VkShaderModule							shaderModule,
3371 									deUint32								group,
3372 									const VkSpecializationInfo*				specializationInfoPtr,
3373 									const VkPipelineShaderStageCreateFlags	pipelineShaderStageCreateFlags,
3374 									const void*								pipelineShaderStageCreateInfopNext)
3375 {
3376 	if (group >= m_shadersGroupCreateInfos.size())
3377 	{
3378 		for (size_t groupNdx = m_shadersGroupCreateInfos.size(); groupNdx <= group; ++groupNdx)
3379 		{
3380 			VkRayTracingShaderGroupCreateInfoKHR	shaderGroupCreateInfo	=
3381 			{
3382 				VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,	//  VkStructureType					sType;
3383 				DE_NULL,													//  const void*						pNext;
3384 				VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR,				//  VkRayTracingShaderGroupTypeKHR	type;
3385 				VK_SHADER_UNUSED_KHR,										//  deUint32						generalShader;
3386 				VK_SHADER_UNUSED_KHR,										//  deUint32						closestHitShader;
3387 				VK_SHADER_UNUSED_KHR,										//  deUint32						anyHitShader;
3388 				VK_SHADER_UNUSED_KHR,										//  deUint32						intersectionShader;
3389 				DE_NULL,													//  const void*						pShaderGroupCaptureReplayHandle;
3390 			};
3391 
3392 			m_shadersGroupCreateInfos.push_back(shaderGroupCreateInfo);
3393 		}
3394 	}
3395 
3396 	const deUint32							shaderStageNdx			= (deUint32)m_shaderCreateInfos.size();
3397 	VkRayTracingShaderGroupCreateInfoKHR&	shaderGroupCreateInfo	= m_shadersGroupCreateInfos[group];
3398 
3399 	switch (shaderStage)
3400 	{
3401 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:		CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,		shaderStageNdx);	break;
3402 		case VK_SHADER_STAGE_MISS_BIT_KHR:			CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,		shaderStageNdx);	break;
3403 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:		CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,		shaderStageNdx);	break;
3404 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:		CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.anyHitShader,		shaderStageNdx);	break;
3405 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:	CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.closestHitShader,	shaderStageNdx);	break;
3406 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:	CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.intersectionShader,	shaderStageNdx);	break;
3407 		default:									TCU_THROW(InternalError, "Unacceptable stage");
3408 	}
3409 
3410 	switch (shaderStage)
3411 	{
3412 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
3413 		case VK_SHADER_STAGE_MISS_BIT_KHR:
3414 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
3415 		{
3416 			DE_ASSERT(shaderGroupCreateInfo.type == VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR);
3417 			shaderGroupCreateInfo.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
3418 
3419 			break;
3420 		}
3421 
3422 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
3423 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
3424 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
3425 		{
3426 			DE_ASSERT(shaderGroupCreateInfo.type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR);
3427 			shaderGroupCreateInfo.type	= (shaderGroupCreateInfo.intersectionShader == VK_SHADER_UNUSED_KHR)
3428 										? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR
3429 										: VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR;
3430 
3431 			break;
3432 		}
3433 
3434 		default: TCU_THROW(InternalError, "Unacceptable stage");
3435 	}
3436 
3437 	{
3438 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo	=
3439 		{
3440 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//  VkStructureType						sType;
3441 			pipelineShaderStageCreateInfopNext,						//  const void*							pNext;
3442 			pipelineShaderStageCreateFlags,							//  VkPipelineShaderStageCreateFlags	flags;
3443 			shaderStage,											//  VkShaderStageFlagBits				stage;
3444 			shaderModule,											//  VkShaderModule						module;
3445 			"main",													//  const char*							pName;
3446 			specializationInfoPtr,									//  const VkSpecializationInfo*			pSpecializationInfo;
3447 		};
3448 
3449 		m_shaderCreateInfos.push_back(shaderCreateInfo);
3450 	}
3451 }
3452 
addLibrary(de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)3453 void RayTracingPipeline::addLibrary (de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)
3454 {
3455 	m_pipelineLibraries.push_back(pipelineLibrary);
3456 }
3457 
createPipelineKHR(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3458 Move<VkPipeline> RayTracingPipeline::createPipelineKHR (const DeviceInterface&			vk,
3459 														const VkDevice					device,
3460 														const VkPipelineLayout			pipelineLayout,
3461 														const std::vector<VkPipeline>&	pipelineLibraries,
3462 														const VkPipelineCache			pipelineCache)
3463 {
3464 	for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3465 		DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3466 
3467 	VkPipelineLibraryCreateInfoKHR				librariesCreateInfo	=
3468 	{
3469 		VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR,	//  VkStructureType	sType;
3470 		DE_NULL,											//  const void*		pNext;
3471 		de::sizeU32(pipelineLibraries),						//  deUint32		libraryCount;
3472 		de::dataOrNull(pipelineLibraries)					//  VkPipeline*		pLibraries;
3473 	};
3474 	const VkRayTracingPipelineInterfaceCreateInfoKHR	pipelineInterfaceCreateInfo		=
3475 	{
3476 		VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR,	//  VkStructureType	sType;
3477 		DE_NULL,															//  const void*		pNext;
3478 		m_maxPayloadSize,													//  deUint32		maxPayloadSize;
3479 		m_maxAttributeSize													//  deUint32		maxAttributeSize;
3480 	};
3481 	const bool											addPipelineInterfaceCreateInfo	= m_maxPayloadSize != 0 || m_maxAttributeSize != 0;
3482 	const VkRayTracingPipelineInterfaceCreateInfoKHR*	pipelineInterfaceCreateInfoPtr	= addPipelineInterfaceCreateInfo ? &pipelineInterfaceCreateInfo : DE_NULL;
3483 	const VkPipelineLibraryCreateInfoKHR*				librariesCreateInfoPtr			= (pipelineLibraries.empty() ? nullptr : &librariesCreateInfo);
3484 
3485 	Move<VkDeferredOperationKHR>						deferredOperation;
3486 	if (m_deferredOperation)
3487 		deferredOperation = createDeferredOperationKHR(vk, device);
3488 
3489 	VkPipelineDynamicStateCreateInfo dynamicStateCreateInfo	=
3490 	{
3491 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	// VkStructureType						sType;
3492 		DE_NULL,												// const void*							pNext;
3493 		0,														// VkPipelineDynamicStateCreateFlags	flags;
3494 		static_cast<deUint32>(m_dynamicStates.size() ),			// deUint32								dynamicStateCount;
3495 		m_dynamicStates.data(),									// const VkDynamicState*				pDynamicStates;
3496 	};
3497 
3498 	const VkRayTracingPipelineCreateInfoKHR				pipelineCreateInfo				=
3499 	{
3500 		VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,	//  VkStructureType								sType;
3501 		DE_NULL,												//  const void*									pNext;
3502 		m_pipelineCreateFlags,									//  VkPipelineCreateFlags						flags;
3503 		de::sizeU32(m_shaderCreateInfos),						//  deUint32									stageCount;
3504 		de::dataOrNull(m_shaderCreateInfos),					//  const VkPipelineShaderStageCreateInfo*		pStages;
3505 		de::sizeU32(m_shadersGroupCreateInfos),					//  deUint32									groupCount;
3506 		de::dataOrNull(m_shadersGroupCreateInfos),				//  const VkRayTracingShaderGroupCreateInfoKHR*	pGroups;
3507 		m_maxRecursionDepth,									//  deUint32									maxRecursionDepth;
3508 		librariesCreateInfoPtr,									//  VkPipelineLibraryCreateInfoKHR*				pLibraryInfo;
3509 		pipelineInterfaceCreateInfoPtr,							//  VkRayTracingPipelineInterfaceCreateInfoKHR*	pLibraryInterface;
3510 		&dynamicStateCreateInfo,								//  const VkPipelineDynamicStateCreateInfo*		pDynamicState;
3511 		pipelineLayout,											//  VkPipelineLayout							layout;
3512 		(VkPipeline)DE_NULL,									//  VkPipeline									basePipelineHandle;
3513 		0,														//  deInt32										basePipelineIndex;
3514 	};
3515 	VkPipeline											object							= DE_NULL;
3516 	VkResult											result							= vk.createRayTracingPipelinesKHR(device, deferredOperation.get(), pipelineCache, 1u, &pipelineCreateInfo, DE_NULL, &object);
3517 	const bool											allowCompileRequired			= ((m_pipelineCreateFlags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) != 0);
3518 
3519 	if (m_deferredOperation)
3520 	{
3521 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS || (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED));
3522 		finishDeferredOperation(vk, device, deferredOperation.get(), m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3523 	}
3524 
3525 	if (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED)
3526 		throw CompileRequiredError("createRayTracingPipelinesKHR returned VK_PIPELINE_COMPILE_REQUIRED");
3527 
3528 	Move<VkPipeline> pipeline (check<VkPipeline>(object), Deleter<VkPipeline>(vk, device, DE_NULL));
3529 	return pipeline;
3530 }
3531 
3532 
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<de::SharedPtr<Move<VkPipeline>>> & pipelineLibraries)3533 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface&									vk,
3534 													 const VkDevice											device,
3535 													 const VkPipelineLayout									pipelineLayout,
3536 													 const std::vector<de::SharedPtr<Move<VkPipeline>>>&	pipelineLibraries)
3537 {
3538 	std::vector<VkPipeline> rawPipelines;
3539 	rawPipelines.reserve(pipelineLibraries.size());
3540 	for (const auto& lib : pipelineLibraries)
3541 		rawPipelines.push_back(lib.get()->get());
3542 
3543 	return createPipelineKHR(vk, device, pipelineLayout, rawPipelines);
3544 }
3545 
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3546 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface&			vk,
3547 													 const VkDevice					device,
3548 													 const VkPipelineLayout			pipelineLayout,
3549 													 const std::vector<VkPipeline>&	pipelineLibraries,
3550 													 const VkPipelineCache			pipelineCache)
3551 {
3552 	return createPipelineKHR(vk, device, pipelineLayout, pipelineLibraries, pipelineCache);
3553 }
3554 
createPipelineWithLibraries(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout)3555 std::vector<de::SharedPtr<Move<VkPipeline>>> RayTracingPipeline::createPipelineWithLibraries (const DeviceInterface&			vk,
3556 																								const VkDevice					device,
3557 																								const VkPipelineLayout			pipelineLayout)
3558 {
3559 	for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3560 		DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3561 
3562 	DE_ASSERT(m_shaderCreateInfos.size() > 0);
3563 	DE_ASSERT(m_shadersGroupCreateInfos.size() > 0);
3564 
3565 	std::vector<de::SharedPtr<Move<VkPipeline>>> result, allLibraries, firstLibraries;
3566 	for(auto it=begin(m_pipelineLibraries), eit=end(m_pipelineLibraries); it!=eit; ++it)
3567 	{
3568 		auto childLibraries = (*it)->get()->createPipelineWithLibraries(vk, device, pipelineLayout);
3569 		DE_ASSERT(childLibraries.size() > 0);
3570 		firstLibraries.push_back(childLibraries[0]);
3571 		std::copy(begin(childLibraries), end(childLibraries), std::back_inserter(allLibraries));
3572 	}
3573 	result.push_back(makeVkSharedPtr(createPipeline(vk, device, pipelineLayout, firstLibraries)));
3574 	std::copy(begin(allLibraries), end(allLibraries), std::back_inserter(result));
3575 	return result;
3576 }
3577 
createShaderBindingTable(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,Allocator & allocator,const deUint32 & shaderGroupHandleSize,const deUint32 shaderGroupBaseAlignment,const deUint32 & firstGroup,const deUint32 & groupCount,const VkBufferCreateFlags & additionalBufferCreateFlags,const VkBufferUsageFlags & additionalBufferUsageFlags,const MemoryRequirement & additionalMemoryRequirement,const VkDeviceAddress & opaqueCaptureAddress,const deUint32 shaderBindingTableOffset,const deUint32 shaderRecordSize,const void ** shaderGroupDataPtrPerGroup,const bool autoAlignRecords)3578 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable (const DeviceInterface&		vk,
3579 																			const VkDevice				device,
3580 																			const VkPipeline			pipeline,
3581 																			Allocator&					allocator,
3582 																			const deUint32&				shaderGroupHandleSize,
3583 																			const deUint32				shaderGroupBaseAlignment,
3584 																			const deUint32&				firstGroup,
3585 																			const deUint32&				groupCount,
3586 																			const VkBufferCreateFlags&	additionalBufferCreateFlags,
3587 																			const VkBufferUsageFlags&	additionalBufferUsageFlags,
3588 																			const MemoryRequirement&	additionalMemoryRequirement,
3589 																			const VkDeviceAddress&		opaqueCaptureAddress,
3590 																			const deUint32				shaderBindingTableOffset,
3591 																			const deUint32				shaderRecordSize,
3592 																			const void**				shaderGroupDataPtrPerGroup,
3593 																			const bool					autoAlignRecords)
3594 {
3595 	DE_ASSERT(shaderGroupBaseAlignment != 0u);
3596 	DE_ASSERT((shaderBindingTableOffset % shaderGroupBaseAlignment) == 0);
3597 	DE_UNREF(shaderGroupBaseAlignment);
3598 
3599 	const auto								totalEntrySize					= (autoAlignRecords ? (deAlign32(shaderGroupHandleSize + shaderRecordSize, shaderGroupHandleSize)) : (shaderGroupHandleSize + shaderRecordSize));
3600 	const deUint32							sbtSize							= shaderBindingTableOffset + groupCount * totalEntrySize;
3601 	const VkBufferUsageFlags				sbtFlags						= VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | additionalBufferUsageFlags;
3602 	VkBufferCreateInfo						sbtCreateInfo					= makeBufferCreateInfo(sbtSize, sbtFlags);
3603 	sbtCreateInfo.flags														|= additionalBufferCreateFlags;
3604 	VkBufferOpaqueCaptureAddressCreateInfo	sbtCaptureAddressInfo			=
3605 	{
3606 		VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO,	// VkStructureType	sType;
3607 		DE_NULL,														// const void*		pNext;
3608 		deUint64(opaqueCaptureAddress)									// deUint64			opaqueCaptureAddress;
3609 	};
3610 
3611 	if (opaqueCaptureAddress != 0u)
3612 	{
3613 		sbtCreateInfo.pNext = &sbtCaptureAddressInfo;
3614 		sbtCreateInfo.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
3615 	}
3616 	const MemoryRequirement			sbtMemRequirements						= MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress | additionalMemoryRequirement;
3617 	de::MovePtr<BufferWithMemory>	sbtBuffer								= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, sbtCreateInfo, sbtMemRequirements));
3618 	vk::Allocation&					sbtAlloc								= sbtBuffer->getAllocation();
3619 
3620 	// collect shader group handles
3621 	std::vector<deUint8>			shaderHandles							(groupCount * shaderGroupHandleSize);
3622 	VK_CHECK(getRayTracingShaderGroupHandles(vk, device, pipeline, firstGroup, groupCount, groupCount * shaderGroupHandleSize, shaderHandles.data()));
3623 
3624 	// reserve place for ShaderRecordKHR after each shader handle ( ShaderRecordKHR size might be 0 ). Also take alignment into consideration
3625 	deUint8* shaderBegin = (deUint8*)sbtAlloc.getHostPtr() + shaderBindingTableOffset;
3626 	for (deUint32 idx = 0; idx < groupCount; ++idx)
3627 	{
3628 		deUint8* shaderSrcPos	= shaderHandles.data() + idx * shaderGroupHandleSize;
3629 		deUint8* shaderDstPos	= shaderBegin + idx * totalEntrySize;
3630 		deMemcpy(shaderDstPos, shaderSrcPos, shaderGroupHandleSize);
3631 
3632 		if (shaderGroupDataPtrPerGroup		!= nullptr &&
3633 			shaderGroupDataPtrPerGroup[idx] != nullptr)
3634 		{
3635 			DE_ASSERT(sbtSize >= static_cast<deUint32>(shaderDstPos - shaderBegin) + shaderGroupHandleSize);
3636 
3637 			deMemcpy(	shaderDstPos + shaderGroupHandleSize,
3638 						shaderGroupDataPtrPerGroup[idx],
3639 						shaderRecordSize);
3640 		}
3641 	}
3642 
3643 	flushMappedMemoryRange(vk, device, sbtAlloc.getMemory(), sbtAlloc.getOffset(), VK_WHOLE_SIZE);
3644 
3645 	return sbtBuffer;
3646 }
3647 
setCreateFlags(const VkPipelineCreateFlags & pipelineCreateFlags)3648 void RayTracingPipeline::setCreateFlags (const VkPipelineCreateFlags& pipelineCreateFlags)
3649 {
3650 	m_pipelineCreateFlags = pipelineCreateFlags;
3651 }
3652 
setMaxRecursionDepth(const deUint32 & maxRecursionDepth)3653 void RayTracingPipeline::setMaxRecursionDepth (const deUint32& maxRecursionDepth)
3654 {
3655 	m_maxRecursionDepth = maxRecursionDepth;
3656 }
3657 
setMaxPayloadSize(const deUint32 & maxPayloadSize)3658 void RayTracingPipeline::setMaxPayloadSize (const deUint32& maxPayloadSize)
3659 {
3660 	m_maxPayloadSize = maxPayloadSize;
3661 }
3662 
setMaxAttributeSize(const deUint32 & maxAttributeSize)3663 void RayTracingPipeline::setMaxAttributeSize (const deUint32& maxAttributeSize)
3664 {
3665 	m_maxAttributeSize = maxAttributeSize;
3666 }
3667 
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)3668 void RayTracingPipeline::setDeferredOperation (const bool		deferredOperation,
3669 											   const deUint32	workerThreadCount)
3670 {
3671 	m_deferredOperation = deferredOperation;
3672 	m_workerThreadCount = workerThreadCount;
3673 }
3674 
addDynamicState(const VkDynamicState & dynamicState)3675 void RayTracingPipeline::addDynamicState(const VkDynamicState& dynamicState)
3676 {
3677 	m_dynamicStates.push_back(dynamicState);
3678 }
3679 
3680 class RayTracingPropertiesKHR : public RayTracingProperties
3681 {
3682 public:
3683 							RayTracingPropertiesKHR						() = delete;
3684 							RayTracingPropertiesKHR						(const InstanceInterface&	vki,
3685 																		 const VkPhysicalDevice		physicalDevice);
3686 	virtual					~RayTracingPropertiesKHR					();
3687 
getShaderGroupHandleSize(void)3688 	uint32_t		getShaderGroupHandleSize					(void)	override { return m_rayTracingPipelineProperties.shaderGroupHandleSize;						}
getShaderGroupHandleAlignment(void)3689 	uint32_t		getShaderGroupHandleAlignment				(void)	override { return m_rayTracingPipelineProperties.shaderGroupHandleAlignment;				}
getMaxRecursionDepth(void)3690 	uint32_t		getMaxRecursionDepth						(void)	override { return m_rayTracingPipelineProperties.maxRayRecursionDepth;						}
getMaxShaderGroupStride(void)3691 	uint32_t		getMaxShaderGroupStride						(void)	override { return m_rayTracingPipelineProperties.maxShaderGroupStride;						}
getShaderGroupBaseAlignment(void)3692 	uint32_t		getShaderGroupBaseAlignment					(void)	override { return m_rayTracingPipelineProperties.shaderGroupBaseAlignment;					}
getMaxGeometryCount(void)3693 	uint64_t		getMaxGeometryCount							(void)	override { return m_accelerationStructureProperties.maxGeometryCount;						}
getMaxInstanceCount(void)3694 	uint64_t		getMaxInstanceCount							(void)	override { return m_accelerationStructureProperties.maxInstanceCount;						}
getMaxPrimitiveCount(void)3695 	uint64_t		getMaxPrimitiveCount						(void)	override { return m_accelerationStructureProperties.maxPrimitiveCount;						}
getMaxDescriptorSetAccelerationStructures(void)3696 	uint32_t		getMaxDescriptorSetAccelerationStructures	(void)	override { return m_accelerationStructureProperties.maxDescriptorSetAccelerationStructures;	}
getMaxRayDispatchInvocationCount(void)3697 	uint32_t		getMaxRayDispatchInvocationCount			(void)	override { return m_rayTracingPipelineProperties.maxRayDispatchInvocationCount;				}
getMaxRayHitAttributeSize(void)3698 	uint32_t		getMaxRayHitAttributeSize					(void)	override { return m_rayTracingPipelineProperties.maxRayHitAttributeSize;					}
getMaxMemoryAllocationCount(void)3699 	uint32_t		getMaxMemoryAllocationCount					(void)	override { return m_maxMemoryAllocationCount;												}
3700 
3701 protected:
3702 	VkPhysicalDeviceAccelerationStructurePropertiesKHR	m_accelerationStructureProperties;
3703 	VkPhysicalDeviceRayTracingPipelinePropertiesKHR		m_rayTracingPipelineProperties;
3704 	deUint32											m_maxMemoryAllocationCount;
3705 };
3706 
~RayTracingPropertiesKHR()3707 RayTracingPropertiesKHR::~RayTracingPropertiesKHR ()
3708 {
3709 }
3710 
RayTracingPropertiesKHR(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3711 RayTracingPropertiesKHR::RayTracingPropertiesKHR (const InstanceInterface&	vki,
3712 												  const VkPhysicalDevice	physicalDevice)
3713 	: RayTracingProperties	(vki, physicalDevice)
3714 {
3715 	m_accelerationStructureProperties	= getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3716 	m_rayTracingPipelineProperties		= getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3717 	m_maxMemoryAllocationCount			= getPhysicalDeviceProperties(vki, physicalDevice).limits.maxMemoryAllocationCount;
3718 }
3719 
makeRayTracingProperties(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3720 de::MovePtr<RayTracingProperties> makeRayTracingProperties (const InstanceInterface&	vki,
3721 															const VkPhysicalDevice		physicalDevice)
3722 {
3723 	return de::MovePtr<RayTracingProperties>(new RayTracingPropertiesKHR(vki, physicalDevice));
3724 }
3725 
cmdTraceRaysKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3726 static inline void cmdTraceRaysKHR (const DeviceInterface&					vk,
3727 									VkCommandBuffer							commandBuffer,
3728 									const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3729 									const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3730 									const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3731 									const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3732 									deUint32								width,
3733 									deUint32								height,
3734 									deUint32								depth)
3735 {
3736 	return vk.cmdTraceRaysKHR(commandBuffer,
3737 							  raygenShaderBindingTableRegion,
3738 							  missShaderBindingTableRegion,
3739 							  hitShaderBindingTableRegion,
3740 							  callableShaderBindingTableRegion,
3741 							  width,
3742 							  height,
3743 							  depth);
3744 }
3745 
3746 
cmdTraceRays(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3747 void cmdTraceRays (const DeviceInterface&					vk,
3748 				   VkCommandBuffer							commandBuffer,
3749 				   const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3750 				   const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3751 				   const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3752 				   const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3753 				   deUint32									width,
3754 				   deUint32									height,
3755 				   deUint32									depth)
3756 {
3757 	DE_ASSERT(raygenShaderBindingTableRegion	!= DE_NULL);
3758 	DE_ASSERT(missShaderBindingTableRegion		!= DE_NULL);
3759 	DE_ASSERT(hitShaderBindingTableRegion		!= DE_NULL);
3760 	DE_ASSERT(callableShaderBindingTableRegion	!= DE_NULL);
3761 
3762 	return cmdTraceRaysKHR(vk,
3763 						   commandBuffer,
3764 						   raygenShaderBindingTableRegion,
3765 						   missShaderBindingTableRegion,
3766 						   hitShaderBindingTableRegion,
3767 						   callableShaderBindingTableRegion,
3768 						   width,
3769 						   height,
3770 						   depth);
3771 }
3772 
cmdTraceRaysIndirectKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3773 static inline void cmdTraceRaysIndirectKHR (const DeviceInterface&					vk,
3774 											VkCommandBuffer							commandBuffer,
3775 											const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3776 											const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3777 											const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3778 											const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3779 											VkDeviceAddress							indirectDeviceAddress )
3780 {
3781 	DE_ASSERT(raygenShaderBindingTableRegion	!= DE_NULL);
3782 	DE_ASSERT(missShaderBindingTableRegion		!= DE_NULL);
3783 	DE_ASSERT(hitShaderBindingTableRegion		!= DE_NULL);
3784 	DE_ASSERT(callableShaderBindingTableRegion	!= DE_NULL);
3785 	DE_ASSERT(indirectDeviceAddress				!= 0);
3786 
3787 	return vk.cmdTraceRaysIndirectKHR(commandBuffer,
3788 									  raygenShaderBindingTableRegion,
3789 									  missShaderBindingTableRegion,
3790 									  hitShaderBindingTableRegion,
3791 									  callableShaderBindingTableRegion,
3792 									  indirectDeviceAddress);
3793 }
3794 
cmdTraceRaysIndirect(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3795 void cmdTraceRaysIndirect (const DeviceInterface&					vk,
3796 						   VkCommandBuffer							commandBuffer,
3797 						   const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3798 						   const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3799 						   const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3800 						   const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3801 						   VkDeviceAddress							indirectDeviceAddress)
3802 {
3803 	return cmdTraceRaysIndirectKHR(vk,
3804 								   commandBuffer,
3805 								   raygenShaderBindingTableRegion,
3806 								   missShaderBindingTableRegion,
3807 								   hitShaderBindingTableRegion,
3808 								   callableShaderBindingTableRegion,
3809 								   indirectDeviceAddress);
3810 }
3811 
cmdTraceRaysIndirect2KHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)3812 static inline void cmdTraceRaysIndirect2KHR (const DeviceInterface&	vk,
3813 											VkCommandBuffer			commandBuffer,
3814 											VkDeviceAddress			indirectDeviceAddress )
3815 {
3816 	DE_ASSERT(indirectDeviceAddress != 0);
3817 
3818 	return vk.cmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
3819 }
3820 
cmdTraceRaysIndirect2(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)3821 void cmdTraceRaysIndirect2	(const DeviceInterface&	vk,
3822 							 VkCommandBuffer		commandBuffer,
3823 							 VkDeviceAddress		indirectDeviceAddress)
3824 {
3825 	return cmdTraceRaysIndirect2KHR(vk, commandBuffer, indirectDeviceAddress);
3826 }
3827 
3828 #else
3829 
3830 deUint32 rayTracingDefineAnything()
3831 {
3832 	return 0;
3833 }
3834 
3835 #endif // CTS_USES_VULKANSC
3836 
3837 } // vk
3838