• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan CTS Framework
3  * --------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Utilities for creating commonly used Vulkan objects
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vkRayTracingUtil.hpp"
25 
26 #include "vkRefUtil.hpp"
27 #include "vkQueryUtil.hpp"
28 #include "vkObjUtil.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkCmdUtil.hpp"
31 
32 #include "deStringUtil.hpp"
33 #include "deSTLUtil.hpp"
34 
35 #include <vector>
36 #include <string>
37 #include <thread>
38 #include <limits>
39 #include <type_traits>
40 #include <map>
41 
42 namespace vk
43 {
44 
45 #ifndef CTS_USES_VULKANSC
46 
47 static const deUint32 WATCHDOG_INTERVAL = 16384; // Touch watchDog every N iterations.
48 
49 struct DeferredThreadParams
50 {
51 	const DeviceInterface&	vk;
52 	VkDevice				device;
53 	VkDeferredOperationKHR	deferredOperation;
54 	VkResult				result;
55 };
56 
getFormatSimpleName(vk::VkFormat format)57 std::string getFormatSimpleName (vk::VkFormat format)
58 {
59 	constexpr size_t kPrefixLen = 10; // strlen("VK_FORMAT_")
60 	return de::toLower(de::toString(format).substr(kPrefixLen));
61 }
62 
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)63 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
64 {
65 	float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
66 	float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
67 
68 	if ((s < 0) != (t < 0))
69 		return false;
70 
71 	float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
72 
73 	return a < 0 ?
74 		(s <= 0 && s + t >= a) :
75 		(s >= 0 && s + t <= a);
76 }
77 
78 // Returns true if VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR needs to be supported for the given format.
isMandatoryAccelerationStructureVertexBufferFormat(vk::VkFormat format)79 static bool isMandatoryAccelerationStructureVertexBufferFormat (vk::VkFormat format)
80 {
81 	bool mandatory = false;
82 
83 	switch (format)
84 	{
85     case VK_FORMAT_R32G32_SFLOAT:
86     case VK_FORMAT_R32G32B32_SFLOAT:
87     case VK_FORMAT_R16G16_SFLOAT:
88     case VK_FORMAT_R16G16B16A16_SFLOAT:
89     case VK_FORMAT_R16G16_SNORM:
90     case VK_FORMAT_R16G16B16A16_SNORM:
91 		mandatory = true;
92 		break;
93 	default:
94 		break;
95 	}
96 
97 	return mandatory;
98 }
99 
checkAccelerationStructureVertexBufferFormat(const vk::InstanceInterface & vki,vk::VkPhysicalDevice physicalDevice,vk::VkFormat format)100 void checkAccelerationStructureVertexBufferFormat (const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice, vk::VkFormat format)
101 {
102 	const vk::VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
103 
104 	if ((formatProperties.bufferFeatures & vk::VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR) == 0u)
105 	{
106 		const std::string errorMsg = "Format not supported for acceleration structure vertex buffers";
107 		if (isMandatoryAccelerationStructureVertexBufferFormat(format))
108 			TCU_FAIL(errorMsg);
109 		TCU_THROW(NotSupportedError, errorMsg);
110 	}
111 }
112 
getCommonRayGenerationShader(void)113 std::string getCommonRayGenerationShader (void)
114 {
115 	return
116 		"#version 460 core\n"
117 		"#extension GL_EXT_ray_tracing : require\n"
118 		"layout(location = 0) rayPayloadEXT vec3 hitValue;\n"
119 		"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
120 		"\n"
121 		"void main()\n"
122 		"{\n"
123 		"  uint  rayFlags = 0;\n"
124 		"  uint  cullMask = 0xFF;\n"
125 		"  float tmin     = 0.0;\n"
126 		"  float tmax     = 9.0;\n"
127 		"  vec3  origin   = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
128 		"  vec3  direct   = vec3(0.0, 0.0, -1.0);\n"
129 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
130 		"}\n";
131 }
132 
RaytracedGeometryBase(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType)133 RaytracedGeometryBase::RaytracedGeometryBase (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType)
134 	: m_geometryType	(geometryType)
135 	, m_vertexFormat	(vertexFormat)
136 	, m_indexType		(indexType)
137 	, m_geometryFlags	((VkGeometryFlagsKHR)0u)
138 	, m_hasOpacityMicromap (false)
139 {
140 	if (m_geometryType == VK_GEOMETRY_TYPE_AABBS_KHR)
141 		DE_ASSERT(m_vertexFormat == VK_FORMAT_R32G32B32_SFLOAT);
142 }
143 
~RaytracedGeometryBase()144 RaytracedGeometryBase::~RaytracedGeometryBase ()
145 {
146 }
147 
148 struct GeometryBuilderParams
149 {
150 	VkGeometryTypeKHR	geometryType;
151 	bool				usePadding;
152 };
153 
154 template <typename V, typename I>
buildRaytracedGeometry(const GeometryBuilderParams & params)155 RaytracedGeometryBase* buildRaytracedGeometry (const GeometryBuilderParams& params)
156 {
157 	return new RaytracedGeometry<V, I>(params.geometryType, (params.usePadding ? 1u : 0u));
158 }
159 
makeRaytracedGeometry(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType,bool padVertices)160 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType, bool padVertices)
161 {
162 	const GeometryBuilderParams builderParams { geometryType, padVertices };
163 
164 	switch (vertexFormat)
165 	{
166 		case VK_FORMAT_R32G32_SFLOAT:
167 			switch (indexType)
168 			{
169 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint16>(builderParams));
170 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint32>(builderParams));
171 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, EmptyIndex>(builderParams));
172 				default:						TCU_THROW(InternalError, "Wrong index type");
173 			}
174 		case VK_FORMAT_R32G32B32_SFLOAT:
175 			switch (indexType)
176 			{
177 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint16>(builderParams));
178 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint32>(builderParams));
179 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, EmptyIndex>(builderParams));
180 				default:						TCU_THROW(InternalError, "Wrong index type");
181 			}
182 		case VK_FORMAT_R32G32B32A32_SFLOAT:
183 			switch (indexType)
184 			{
185 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint16>(builderParams));
186 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint32>(builderParams));
187 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, EmptyIndex>(builderParams));
188 				default:						TCU_THROW(InternalError, "Wrong index type");
189 			}
190 		case VK_FORMAT_R16G16_SFLOAT:
191 			switch (indexType)
192 			{
193 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint16>(builderParams));
194 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint32>(builderParams));
195 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, EmptyIndex>(builderParams));
196 				default:						TCU_THROW(InternalError, "Wrong index type");
197 			}
198 		case VK_FORMAT_R16G16B16_SFLOAT:
199 			switch (indexType)
200 			{
201 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint16>(builderParams));
202 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint32>(builderParams));
203 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, EmptyIndex>(builderParams));
204 				default:						TCU_THROW(InternalError, "Wrong index type");
205 			}
206 		case VK_FORMAT_R16G16B16A16_SFLOAT:
207 			switch (indexType)
208 			{
209 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint16>(builderParams));
210 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint32>(builderParams));
211 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, EmptyIndex>(builderParams));
212 				default:						TCU_THROW(InternalError, "Wrong index type");
213 			}
214 		case VK_FORMAT_R16G16_SNORM:
215 			switch (indexType)
216 			{
217 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint16>(builderParams));
218 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint32>(builderParams));
219 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, EmptyIndex>(builderParams));
220 				default:						TCU_THROW(InternalError, "Wrong index type");
221 			}
222 		case VK_FORMAT_R16G16B16_SNORM:
223 			switch (indexType)
224 			{
225 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint16>(builderParams));
226 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint32>(builderParams));
227 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, EmptyIndex>(builderParams));
228 				default:						TCU_THROW(InternalError, "Wrong index type");
229 			}
230 		case VK_FORMAT_R16G16B16A16_SNORM:
231 			switch (indexType)
232 			{
233 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint16>(builderParams));
234 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint32>(builderParams));
235 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, EmptyIndex>(builderParams));
236 				default:						TCU_THROW(InternalError, "Wrong index type");
237 			}
238 		case VK_FORMAT_R64G64_SFLOAT:
239 			switch (indexType)
240 			{
241 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint16>(builderParams));
242 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint32>(builderParams));
243 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, EmptyIndex>(builderParams));
244 				default:						TCU_THROW(InternalError, "Wrong index type");
245 			}
246 		case VK_FORMAT_R64G64B64_SFLOAT:
247 			switch (indexType)
248 			{
249 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint16>(builderParams));
250 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint32>(builderParams));
251 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, EmptyIndex>(builderParams));
252 				default:						TCU_THROW(InternalError, "Wrong index type");
253 			}
254 		case VK_FORMAT_R64G64B64A64_SFLOAT:
255 			switch (indexType)
256 			{
257 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint16>(builderParams));
258 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint32>(builderParams));
259 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, EmptyIndex>(builderParams));
260 				default:						TCU_THROW(InternalError, "Wrong index type");
261 			}
262 		case VK_FORMAT_R8G8_SNORM:
263 			switch (indexType)
264 			{
265 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint16>(builderParams));
266 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint32>(builderParams));
267 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, EmptyIndex>(builderParams));
268 				default:						TCU_THROW(InternalError, "Wrong index type");
269 			}
270 		case VK_FORMAT_R8G8B8_SNORM:
271 			switch (indexType)
272 			{
273 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint16>(builderParams));
274 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint32>(builderParams));
275 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, EmptyIndex>(builderParams));
276 				default:						TCU_THROW(InternalError, "Wrong index type");
277 			}
278 		case VK_FORMAT_R8G8B8A8_SNORM:
279 			switch (indexType)
280 			{
281 				case VK_INDEX_TYPE_UINT16:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint16>(builderParams));
282 				case VK_INDEX_TYPE_UINT32:		return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint32>(builderParams));
283 				case VK_INDEX_TYPE_NONE_KHR:	return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, EmptyIndex>(builderParams));
284 				default:						TCU_THROW(InternalError, "Wrong index type");
285 			}
286 		default:
287 			TCU_THROW(InternalError, "Wrong vertex format");
288 	}
289 
290 }
291 
getBufferDeviceAddress(const DeviceInterface & vk,const VkDevice device,const VkBuffer buffer,VkDeviceSize offset)292 VkDeviceAddress getBufferDeviceAddress ( const DeviceInterface&	vk,
293 										 const VkDevice			device,
294 										 const VkBuffer			buffer,
295 										 VkDeviceSize			offset )
296 {
297 
298 	if (buffer == DE_NULL)
299 		return 0;
300 
301 	VkBufferDeviceAddressInfo deviceAddressInfo
302 	{
303 		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,		// VkStructureType    sType
304 		DE_NULL,											// const void*        pNext
305 		buffer												// VkBuffer           buffer;
306 	};
307 	return vk.getBufferDeviceAddress(device, &deviceAddressInfo) + offset;
308 }
309 
310 
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,deUint32 queryCount)311 static inline Move<VkQueryPool> makeQueryPool (const DeviceInterface&		vk,
312 											   const VkDevice				device,
313 											   const VkQueryType			queryType,
314 											   deUint32					queryCount)
315 {
316 	const VkQueryPoolCreateInfo				queryPoolCreateInfo =
317 	{
318 		VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,		// sType
319 		DE_NULL,										// pNext
320 		(VkQueryPoolCreateFlags)0,						// flags
321 		queryType,										// queryType
322 		queryCount,										// queryCount
323 		0u,												// pipelineStatistics
324 	};
325 	return createQueryPool(vk, device, &queryPoolCreateInfo);
326 }
327 
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryTrianglesDataKHR & triangles)328 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryTrianglesDataKHR& triangles)
329 {
330 	VkAccelerationStructureGeometryDataKHR result;
331 
332 	deMemset(&result, 0, sizeof(result));
333 
334 	result.triangles = triangles;
335 
336 	return result;
337 }
338 
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryAabbsDataKHR & aabbs)339 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryAabbsDataKHR& aabbs)
340 {
341 	VkAccelerationStructureGeometryDataKHR result;
342 
343 	deMemset(&result, 0, sizeof(result));
344 
345 	result.aabbs = aabbs;
346 
347 	return result;
348 }
349 
makeVkAccelerationStructureInstancesDataKHR(const VkAccelerationStructureGeometryInstancesDataKHR & instances)350 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureInstancesDataKHR (const VkAccelerationStructureGeometryInstancesDataKHR& instances)
351 {
352 	VkAccelerationStructureGeometryDataKHR result;
353 
354 	deMemset(&result, 0, sizeof(result));
355 
356 	result.instances = instances;
357 
358 	return result;
359 }
360 
makeVkAccelerationStructureInstanceKHR(const VkTransformMatrixKHR & transform,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags,deUint64 accelerationStructureReference)361 static inline VkAccelerationStructureInstanceKHR makeVkAccelerationStructureInstanceKHR (const VkTransformMatrixKHR&			transform,
362 																						 deUint32								instanceCustomIndex,
363 																						 deUint32								mask,
364 																						 deUint32								instanceShaderBindingTableRecordOffset,
365 																						 VkGeometryInstanceFlagsKHR				flags,
366 																						 deUint64								accelerationStructureReference)
367 {
368 	VkAccelerationStructureInstanceKHR instance		= { transform, 0, 0, 0, 0, accelerationStructureReference };
369 	instance.instanceCustomIndex					= instanceCustomIndex & 0xFFFFFF;
370 	instance.mask									= mask & 0xFF;
371 	instance.instanceShaderBindingTableRecordOffset	= instanceShaderBindingTableRecordOffset & 0xFFFFFF;
372 	instance.flags									= flags & 0xFF;
373 	return instance;
374 }
375 
getRayTracingShaderGroupHandlesKHR(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)376 VkResult getRayTracingShaderGroupHandlesKHR (const DeviceInterface&		vk,
377 											 const VkDevice				device,
378 											 const VkPipeline			pipeline,
379 											 const deUint32				firstGroup,
380 											 const deUint32				groupCount,
381 											 const deUintptr			dataSize,
382 											 void*						pData)
383 {
384 	return vk.getRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
385 }
386 
getRayTracingShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)387 VkResult getRayTracingShaderGroupHandles (const DeviceInterface&		vk,
388 										  const VkDevice				device,
389 										  const VkPipeline				pipeline,
390 										  const deUint32				firstGroup,
391 										  const deUint32				groupCount,
392 										  const deUintptr				dataSize,
393 										  void*							pData)
394 {
395 	return getRayTracingShaderGroupHandlesKHR(vk, device, pipeline, firstGroup, groupCount, dataSize, pData);
396 }
397 
getRayTracingCaptureReplayShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)398 VkResult getRayTracingCaptureReplayShaderGroupHandles (const DeviceInterface&	vk,
399 													   const VkDevice			device,
400 													   const VkPipeline			pipeline,
401 													   const deUint32			firstGroup,
402 													   const deUint32			groupCount,
403 													   const deUintptr			dataSize,
404 													   void*					pData)
405 {
406 	return vk.getRayTracingCaptureReplayShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
407 }
408 
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation)409 VkResult finishDeferredOperation (const DeviceInterface&	vk,
410 								  VkDevice					device,
411 								  VkDeferredOperationKHR	deferredOperation)
412 {
413 	VkResult result = vk.deferredOperationJoinKHR(device, deferredOperation);
414 
415 	while (result == VK_THREAD_IDLE_KHR)
416 	{
417 		std::this_thread::yield();
418 		result = vk.deferredOperationJoinKHR(device, deferredOperation);
419 	}
420 
421 	switch( result )
422 	{
423 		case VK_SUCCESS:
424 		{
425 			// Deferred operation has finished. Query its result
426 			result = vk.getDeferredOperationResultKHR(device, deferredOperation);
427 
428 			break;
429 		}
430 
431 		case VK_THREAD_DONE_KHR:
432 		{
433 			// Deferred operation is being wrapped up by another thread
434 			// wait for that thread to finish
435 			do
436 			{
437 				std::this_thread::yield();
438 				result = vk.getDeferredOperationResultKHR(device, deferredOperation);
439 			} while (result == VK_NOT_READY);
440 
441 			break;
442 		}
443 
444 		default:
445 		{
446 			DE_ASSERT(false);
447 
448 			break;
449 		}
450 	}
451 
452 	return result;
453 }
454 
finishDeferredOperationThreaded(DeferredThreadParams * deferredThreadParams)455 void finishDeferredOperationThreaded (DeferredThreadParams* deferredThreadParams)
456 {
457 	deferredThreadParams->result = finishDeferredOperation(deferredThreadParams->vk, deferredThreadParams->device, deferredThreadParams->deferredOperation);
458 }
459 
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation,const deUint32 workerThreadCount,const bool operationNotDeferred)460 void finishDeferredOperation (const DeviceInterface&	vk,
461 							  VkDevice					device,
462 							  VkDeferredOperationKHR	deferredOperation,
463 							  const deUint32			workerThreadCount,
464 							  const bool				operationNotDeferred)
465 {
466 
467 	if (operationNotDeferred)
468 	{
469 		// when the operation deferral returns VK_OPERATION_NOT_DEFERRED_KHR,
470 		// the deferred operation should act as if no command was deferred
471 		VK_CHECK(vk.getDeferredOperationResultKHR(device, deferredOperation));
472 
473 
474 		// there is not need to join any threads to the deferred operation,
475 		// so below can be skipped.
476 		return;
477 	}
478 
479 	if (workerThreadCount == 0)
480 	{
481 		VK_CHECK(finishDeferredOperation(vk, device, deferredOperation));
482 	}
483 	else
484 	{
485 		const deUint32							maxThreadCountSupported	= deMinu32(256u, vk.getDeferredOperationMaxConcurrencyKHR(device, deferredOperation));
486 		const deUint32							requestedThreadCount	= workerThreadCount;
487 		const deUint32							testThreadCount			= requestedThreadCount == std::numeric_limits<deUint32>::max() ? maxThreadCountSupported : requestedThreadCount;
488 
489 		if (maxThreadCountSupported == 0)
490 			TCU_FAIL("vkGetDeferredOperationMaxConcurrencyKHR must not return 0");
491 
492 		const DeferredThreadParams				deferredThreadParams	=
493 		{
494 			vk,					//  const DeviceInterface&	vk;
495 			device,				//  VkDevice				device;
496 			deferredOperation,	//  VkDeferredOperationKHR	deferredOperation;
497 			VK_RESULT_MAX_ENUM,	//  VResult					result;
498 		};
499 		std::vector<DeferredThreadParams>		threadParams	(testThreadCount, deferredThreadParams);
500 		std::vector<de::MovePtr<std::thread> >	threads			(testThreadCount);
501 		bool									executionResult	= false;
502 
503 		DE_ASSERT(threads.size() > 0 && threads.size() == testThreadCount);
504 
505 		for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
506 			threads[threadNdx] = de::MovePtr<std::thread>(new std::thread(finishDeferredOperationThreaded, &threadParams[threadNdx]));
507 
508 		for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
509 			threads[threadNdx]->join();
510 
511 		for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
512 			if (threadParams[threadNdx].result == VK_SUCCESS)
513 				executionResult = true;
514 
515 		if (!executionResult)
516 			TCU_FAIL("Neither reported VK_SUCCESS");
517 	}
518 }
519 
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const VkDeviceSize storageSize)520 SerialStorage::SerialStorage (const DeviceInterface&									vk,
521 							  const VkDevice											device,
522 							  Allocator&												allocator,
523 							  const VkAccelerationStructureBuildTypeKHR					buildType,
524 							  const VkDeviceSize										storageSize)
525 	: m_buildType		(buildType)
526 	, m_storageSize		(storageSize)
527 	, m_serialInfo		()
528 {
529 	const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
530 	try
531 	{
532 		m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
533 	}
534 	catch (const tcu::NotSupportedError&)
535 	{
536 		// retry without Cached flag
537 		m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
538 	}
539 }
540 
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const SerialInfo & serialInfo)541 SerialStorage::SerialStorage (const DeviceInterface&						vk,
542 							  const VkDevice								device,
543 							  Allocator&									allocator,
544 							  const VkAccelerationStructureBuildTypeKHR		buildType,
545 							  const SerialInfo&								serialInfo)
546 	: m_buildType		(buildType)
547 	, m_storageSize		(serialInfo.sizes()[0])	// raise assertion if serialInfo is empty
548 	, m_serialInfo		(serialInfo)
549 {
550 	DE_ASSERT(serialInfo.sizes().size() >= 2u);
551 
552 	// create buffer for top-level acceleration structure
553 	{
554 		const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(m_storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
555 		m_buffer										= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
556 	}
557 
558 	// create buffers for bottom-level acceleration structures
559 	{
560 		std::vector<deUint64>	addrs;
561 
562 		for (std::size_t i = 1; i < serialInfo.addresses().size(); ++i)
563 		{
564 			const deUint64& lookAddr = serialInfo.addresses()[i];
565 			auto end = addrs.end();
566 			auto match = std::find_if(addrs.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
567 			if (match == end)
568 			{
569 				addrs.emplace_back(lookAddr);
570 				m_bottoms.emplace_back(de::SharedPtr<SerialStorage>(new SerialStorage(vk, device, allocator, buildType, serialInfo.sizes()[i])));
571 			}
572 		}
573 	}
574 }
575 
getAddress(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)576 VkDeviceOrHostAddressKHR SerialStorage::getAddress (const DeviceInterface&						vk,
577 													const VkDevice								device,
578 													const VkAccelerationStructureBuildTypeKHR	buildType)
579 {
580 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
581 		return makeDeviceOrHostAddressKHR(vk, device, m_buffer->get(), 0);
582 	else
583 		return makeDeviceOrHostAddressKHR(m_buffer->getAllocation().getHostPtr());
584 }
585 
getASHeader()586 SerialStorage::AccelerationStructureHeader* SerialStorage::getASHeader ()
587 {
588 	return reinterpret_cast<AccelerationStructureHeader*>(getHostAddress().hostAddress);
589 }
590 
hasDeepFormat() const591 bool SerialStorage::hasDeepFormat () const
592 {
593 	return (m_serialInfo.sizes().size() >= 2u);
594 }
595 
getBottomStorage(deUint32 index) const596 de::SharedPtr<SerialStorage> SerialStorage::getBottomStorage (deUint32 index) const
597 {
598 	return m_bottoms[index];
599 }
600 
getHostAddress(VkDeviceSize offset)601 VkDeviceOrHostAddressKHR SerialStorage::getHostAddress (VkDeviceSize offset)
602 {
603 	DE_ASSERT(offset < m_storageSize);
604 	return makeDeviceOrHostAddressKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
605 }
606 
getHostAddressConst(VkDeviceSize offset)607 VkDeviceOrHostAddressConstKHR SerialStorage::getHostAddressConst (VkDeviceSize offset)
608 {
609 	return makeDeviceOrHostAddressConstKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
610 }
611 
getAddressConst(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)612 VkDeviceOrHostAddressConstKHR SerialStorage::getAddressConst (const DeviceInterface&					vk,
613 															  const VkDevice							device,
614 															  const VkAccelerationStructureBuildTypeKHR	buildType)
615 {
616 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
617 		return makeDeviceOrHostAddressConstKHR(vk, device, m_buffer->get(), 0);
618 	else
619 		return getHostAddressConst();
620 }
621 
getStorageSize() const622 inline VkDeviceSize SerialStorage::getStorageSize () const
623 {
624 	return m_storageSize;
625 }
626 
getSerialInfo() const627 inline const SerialInfo& SerialStorage::getSerialInfo () const
628 {
629 	return m_serialInfo;
630 }
631 
getDeserializedSize()632 deUint64 SerialStorage::getDeserializedSize ()
633 {
634 	deUint64		result		= 0;
635 	const deUint8*	startPtr	= static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr());
636 
637 	DE_ASSERT(sizeof(result) == DESERIALIZED_SIZE_SIZE);
638 
639 	deMemcpy(&result, startPtr + DESERIALIZED_SIZE_OFFSET, sizeof(result));
640 
641 	return result;
642 }
643 
~BottomLevelAccelerationStructure()644 BottomLevelAccelerationStructure::~BottomLevelAccelerationStructure ()
645 {
646 }
647 
BottomLevelAccelerationStructure()648 BottomLevelAccelerationStructure::BottomLevelAccelerationStructure ()
649 	: m_structureSize		(0u)
650 	, m_updateScratchSize	(0u)
651 	, m_buildScratchSize	(0u)
652 {
653 }
654 
setGeometryData(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags)655 void BottomLevelAccelerationStructure::setGeometryData (const std::vector<tcu::Vec3>&	geometryData,
656 														const bool						triangles,
657 														const VkGeometryFlagsKHR		geometryFlags)
658 {
659 	if (triangles)
660 		DE_ASSERT((geometryData.size() % 3) == 0);
661 	else
662 		DE_ASSERT((geometryData.size() % 2) == 0);
663 
664 	setGeometryCount(1u);
665 
666 	addGeometry(geometryData, triangles, geometryFlags);
667 }
668 
setDefaultGeometryData(const VkShaderStageFlagBits testStage,const VkGeometryFlagsKHR geometryFlags)669 void BottomLevelAccelerationStructure::setDefaultGeometryData (const VkShaderStageFlagBits	testStage,
670 															   const VkGeometryFlagsKHR		geometryFlags)
671 {
672 	bool					trianglesData	= false;
673 	float					z				= 0.0f;
674 	std::vector<tcu::Vec3>	geometryData;
675 
676 	switch (testStage)
677 	{
678 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:		z = -1.0f; trianglesData = true;	break;
679 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:		z = -1.0f; trianglesData = true;	break;
680 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:	z = -1.0f; trianglesData = true;	break;
681 		case VK_SHADER_STAGE_MISS_BIT_KHR:			z = -9.9f; trianglesData = true;	break;
682 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:	z = -1.0f; trianglesData = false;	break;
683 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:		z = -1.0f; trianglesData = true;	break;
684 		default:									TCU_THROW(InternalError, "Unacceptable stage");
685 	}
686 
687 	if (trianglesData)
688 	{
689 		geometryData.reserve(6);
690 
691 		geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
692 		geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
693 		geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
694 		geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
695 		geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
696 		geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
697 	}
698 	else
699 	{
700 		geometryData.reserve(2);
701 
702 		geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
703 		geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
704 	}
705 
706 	setGeometryCount(1u);
707 
708 	addGeometry(geometryData, trianglesData, geometryFlags);
709 }
710 
setGeometryCount(const size_t geometryCount)711 void BottomLevelAccelerationStructure::setGeometryCount (const size_t geometryCount)
712 {
713 	m_geometriesData.clear();
714 
715 	m_geometriesData.reserve(geometryCount);
716 }
717 
addGeometry(de::SharedPtr<RaytracedGeometryBase> & raytracedGeometry)718 void BottomLevelAccelerationStructure::addGeometry (de::SharedPtr<RaytracedGeometryBase>&		raytracedGeometry)
719 {
720 	m_geometriesData.push_back(raytracedGeometry);
721 }
722 
addGeometry(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags,const VkAccelerationStructureTrianglesOpacityMicromapEXT * opacityGeometryMicromap)723 void BottomLevelAccelerationStructure::addGeometry (const std::vector<tcu::Vec3>&	geometryData,
724 													const bool						triangles,
725 													const VkGeometryFlagsKHR		geometryFlags,
726 													const VkAccelerationStructureTrianglesOpacityMicromapEXT* opacityGeometryMicromap)
727 {
728 	DE_ASSERT(geometryData.size() > 0);
729 	DE_ASSERT((triangles && geometryData.size() % 3 == 0) || (!triangles && geometryData.size() % 2 == 0));
730 
731 	if (!triangles)
732 		for (size_t posNdx = 0; posNdx < geometryData.size() / 2; ++posNdx)
733 		{
734 			DE_ASSERT(geometryData[2 * posNdx].x() <= geometryData[2 * posNdx + 1].x());
735 			DE_ASSERT(geometryData[2 * posNdx].y() <= geometryData[2 * posNdx + 1].y());
736 			DE_ASSERT(geometryData[2 * posNdx].z() <= geometryData[2 * posNdx + 1].z());
737 		}
738 
739 	de::SharedPtr<RaytracedGeometryBase> geometry = makeRaytracedGeometry(triangles ? VK_GEOMETRY_TYPE_TRIANGLES_KHR : VK_GEOMETRY_TYPE_AABBS_KHR, VK_FORMAT_R32G32B32_SFLOAT, VK_INDEX_TYPE_NONE_KHR);
740 	for (auto it = begin(geometryData), eit = end(geometryData); it != eit; ++it)
741 		geometry->addVertex(*it);
742 
743 	geometry->setGeometryFlags(geometryFlags);
744 	if (opacityGeometryMicromap)
745 		geometry->setOpacityMicromap(opacityGeometryMicromap);
746 	addGeometry(geometry);
747 }
748 
getStructureBuildSizes() const749 VkAccelerationStructureBuildSizesInfoKHR BottomLevelAccelerationStructure::getStructureBuildSizes () const
750 {
751 	return
752 	{
753 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
754 		DE_NULL,														//  const void*		pNext;
755 		m_structureSize,												//  VkDeviceSize	accelerationStructureSize;
756 		m_updateScratchSize,											//  VkDeviceSize	updateScratchSize;
757 		m_buildScratchSize												//  VkDeviceSize	buildScratchSize;
758 	};
759 };
760 
getVertexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)761 VkDeviceSize getVertexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
762 {
763 	DE_ASSERT(geometriesData.size() != 0);
764 	VkDeviceSize					bufferSizeBytes = 0;
765 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
766 		bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getVertexByteSize(),8);
767 	return bufferSizeBytes;
768 }
769 
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)770 BufferWithMemory* createVertexBuffer (const DeviceInterface&	vk,
771 									  const VkDevice			device,
772 									  Allocator&				allocator,
773 									  const VkDeviceSize		bufferSizeBytes)
774 {
775 	const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
776 	return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
777 }
778 
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)779 BufferWithMemory* createVertexBuffer (const DeviceInterface&									vk,
780 									  const VkDevice											device,
781 									  Allocator&												allocator,
782 									  const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
783 {
784 	return createVertexBuffer(vk, device, allocator, getVertexBufferSize(geometriesData));
785 }
786 
updateVertexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * vertexBuffer,VkDeviceSize geometriesOffset=0)787 void updateVertexBuffer (const DeviceInterface&										vk,
788 						 const VkDevice												device,
789 						 const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData,
790 						 BufferWithMemory*											vertexBuffer,
791 						 VkDeviceSize												geometriesOffset = 0)
792 {
793 	const Allocation&				geometryAlloc		= vertexBuffer->getAllocation();
794 	deUint8*						bufferStart			= static_cast<deUint8*>(geometryAlloc.getHostPtr());
795 	VkDeviceSize					bufferOffset		= geometriesOffset;
796 
797 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
798 	{
799 		const void*					geometryPtr			= geometriesData[geometryNdx]->getVertexPointer();
800 		const size_t				geometryPtrSize		= geometriesData[geometryNdx]->getVertexByteSize();
801 
802 		deMemcpy(&bufferStart[bufferOffset], geometryPtr, geometryPtrSize);
803 
804 		bufferOffset += deAlignSize(geometryPtrSize,8);
805 	}
806 
807 	// Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
808 	// align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
809 	// for the vertex and index buffers, so flushing is actually not needed.
810 	flushAlloc(vk, device, geometryAlloc);
811 }
812 
getIndexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)813 VkDeviceSize getIndexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
814 {
815 	DE_ASSERT(!geometriesData.empty());
816 
817 	VkDeviceSize	bufferSizeBytes = 0;
818 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
819 		if(geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
820 			bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getIndexByteSize(),8);
821 	return bufferSizeBytes;
822 }
823 
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)824 BufferWithMemory* createIndexBuffer (const DeviceInterface&		vk,
825 									 const VkDevice				device,
826 									 Allocator&					allocator,
827 									 const VkDeviceSize			bufferSizeBytes)
828 {
829 	DE_ASSERT(bufferSizeBytes);
830 	const VkBufferCreateInfo		bufferCreateInfo	= makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
831 	return  new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
832 }
833 
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)834 BufferWithMemory* createIndexBuffer (const DeviceInterface&										vk,
835 									 const VkDevice												device,
836 									 Allocator&													allocator,
837 									 const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData)
838 {
839 	const VkDeviceSize bufferSizeBytes = getIndexBufferSize(geometriesData);
840 	return bufferSizeBytes ? createIndexBuffer(vk, device, allocator, bufferSizeBytes) : nullptr;
841 }
842 
updateIndexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * indexBuffer,VkDeviceSize geometriesOffset)843 void updateIndexBuffer (const DeviceInterface&										vk,
844 						const VkDevice												device,
845 						const std::vector<de::SharedPtr<RaytracedGeometryBase>>&	geometriesData,
846 						BufferWithMemory*											indexBuffer,
847 						VkDeviceSize												geometriesOffset)
848 {
849 	const Allocation&				indexAlloc			= indexBuffer->getAllocation();
850 	deUint8*						bufferStart			= static_cast<deUint8*>(indexAlloc.getHostPtr());
851 	VkDeviceSize					bufferOffset		= geometriesOffset;
852 
853 	for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
854 	{
855 		if (geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
856 		{
857 			const void*					indexPtr		= geometriesData[geometryNdx]->getIndexPointer();
858 			const size_t				indexPtrSize	= geometriesData[geometryNdx]->getIndexByteSize();
859 
860 			deMemcpy(&bufferStart[bufferOffset], indexPtr, indexPtrSize);
861 
862 			bufferOffset += deAlignSize(indexPtrSize, 8);
863 		}
864 	}
865 
866 	// Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
867 	// align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
868 	// for the vertex and index buffers, so flushing is actually not needed.
869 	flushAlloc(vk, device, indexAlloc);
870 }
871 
872 class BottomLevelAccelerationStructureKHR : public BottomLevelAccelerationStructure
873 {
874 public:
875 	static deUint32											getRequiredAllocationCount						(void);
876 
877 															BottomLevelAccelerationStructureKHR				();
878 															BottomLevelAccelerationStructureKHR				(const BottomLevelAccelerationStructureKHR&		other) = delete;
879 	virtual													~BottomLevelAccelerationStructureKHR			();
880 
881 	void													setBuildType									(const VkAccelerationStructureBuildTypeKHR		buildType) override;
882 	VkAccelerationStructureBuildTypeKHR						getBuildType									() const override;
883 	void													setCreateFlags									(const VkAccelerationStructureCreateFlagsKHR	createFlags) override;
884 	void													setCreateGeneric								(bool											createGeneric) override;
885 	void													setCreationBufferUnbounded						(bool											creationBufferUnbounded) override;
886 	void													setBuildFlags									(const VkBuildAccelerationStructureFlagsKHR		buildFlags) override;
887 	void													setBuildWithoutGeometries						(bool											buildWithoutGeometries) override;
888 	void													setBuildWithoutPrimitives						(bool											buildWithoutPrimitives) override;
889 	void													setDeferredOperation							(const bool										deferredOperation,
890 																											 const deUint32									workerThreadCount) override;
891 	void													setUseArrayOfPointers							(const bool										useArrayOfPointers) override;
892 	void													setUseMaintenance5								(const bool										useMaintenance5) override;
893 	void													setIndirectBuildParameters						(const VkBuffer									indirectBuffer,
894 																											 const VkDeviceSize								indirectBufferOffset,
895 																											 const deUint32									indirectBufferStride) override;
896 	VkBuildAccelerationStructureFlagsKHR					getBuildFlags									() const override;
897 
898 	void													create											(const DeviceInterface&							vk,
899 																											 const VkDevice									device,
900 																											 Allocator&										allocator,
901 																											 VkDeviceSize									structureSize,
902 																											 VkDeviceAddress								deviceAddress			= 0u,
903 																											 const void*									pNext					= DE_NULL,
904 																											 const MemoryRequirement&						addMemoryRequirement	= MemoryRequirement::Any,
905 																											 const VkBuffer									creationBuffer			= VK_NULL_HANDLE,
906 																											 const VkDeviceSize								creationBufferSize		= 0u) override;
907 	void													build											(const DeviceInterface&							vk,
908 																											 const VkDevice									device,
909 																											 const VkCommandBuffer							cmdBuffer,
910 																											 BottomLevelAccelerationStructure*				srcAccelerationStructure = DE_NULL) override;
911 	void													copyFrom										(const DeviceInterface&							vk,
912 																											 const VkDevice									device,
913 																											 const VkCommandBuffer							cmdBuffer,
914 																											 BottomLevelAccelerationStructure*				accelerationStructure,
915 																											 bool											compactCopy) override;
916 
917 	void													serialize										(const DeviceInterface&							vk,
918 																											 const VkDevice									device,
919 																											 const VkCommandBuffer							cmdBuffer,
920 																											 SerialStorage*									storage) override;
921 	void													deserialize										(const DeviceInterface&							vk,
922 																											 const VkDevice									device,
923 																											 const VkCommandBuffer							cmdBuffer,
924 																											 SerialStorage*									storage) override;
925 
926 	const VkAccelerationStructureKHR*						getPtr											(void) const override;
927 	void													updateGeometry									(size_t											geometryIndex,
928 																											 de::SharedPtr<RaytracedGeometryBase>&			raytracedGeometry) override;
929 
930 protected:
931 	VkAccelerationStructureBuildTypeKHR						m_buildType;
932 	VkAccelerationStructureCreateFlagsKHR					m_createFlags;
933 	bool													m_createGeneric;
934 	bool													m_creationBufferUnbounded;
935 	VkBuildAccelerationStructureFlagsKHR					m_buildFlags;
936 	bool													m_buildWithoutGeometries;
937 	bool													m_buildWithoutPrimitives;
938 	bool													m_deferredOperation;
939 	deUint32												m_workerThreadCount;
940 	bool													m_useArrayOfPointers;
941 	bool													m_useMaintenance5;
942 	de::MovePtr<BufferWithMemory>							m_accelerationStructureBuffer;
943 	de::MovePtr<BufferWithMemory>							m_vertexBuffer;
944 	de::MovePtr<BufferWithMemory>							m_indexBuffer;
945 	de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
946 	de::UniquePtr<std::vector<deUint8>>						m_hostScratchBuffer;
947 	Move<VkAccelerationStructureKHR>						m_accelerationStructureKHR;
948 	VkBuffer												m_indirectBuffer;
949 	VkDeviceSize											m_indirectBufferOffset;
950 	deUint32												m_indirectBufferStride;
951 
952 	void													prepareGeometries								(const DeviceInterface&												vk,
953 																											 const VkDevice														device,
954 																											 std::vector<VkAccelerationStructureGeometryKHR>&					accelerationStructureGeometriesKHR,
955 																											 std::vector<VkAccelerationStructureGeometryKHR*>&					accelerationStructureGeometriesKHRPointers,
956 																											 std::vector<VkAccelerationStructureBuildRangeInfoKHR>&				accelerationStructureBuildRangeInfoKHR,
957 																											 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>&	accelerationStructureGeometryMicromapsEXT,
958 																											 std::vector<deUint32>&												maxPrimitiveCounts,
959 																											 VkDeviceSize														vertexBufferOffset = 0,
960 																											 VkDeviceSize														indexBufferOffset = 0) const;
961 
getAccelerationStructureBuffer() const962 	virtual BufferWithMemory*								getAccelerationStructureBuffer					() const { return m_accelerationStructureBuffer.get(); }
getDeviceScratchBuffer() const963 	virtual BufferWithMemory*								getDeviceScratchBuffer							() const { return m_deviceScratchBuffer.get(); }
getHostScratchBuffer() const964 	virtual std::vector<deUint8>*							getHostScratchBuffer							() const { return m_hostScratchBuffer.get(); }
getVertexBuffer() const965 	virtual BufferWithMemory*								getVertexBuffer									() const { return m_vertexBuffer.get(); }
getIndexBuffer() const966 	virtual BufferWithMemory*								getIndexBuffer									() const { return m_indexBuffer.get(); }
967 
getAccelerationStructureBufferOffset() const968 	virtual VkDeviceSize									getAccelerationStructureBufferOffset			() const { return 0; }
getDeviceScratchBufferOffset() const969 	virtual VkDeviceSize									getDeviceScratchBufferOffset					() const { return 0; }
getVertexBufferOffset() const970 	virtual VkDeviceSize									getVertexBufferOffset							() const { return 0; }
getIndexBufferOffset() const971 	virtual VkDeviceSize									getIndexBufferOffset							() const { return 0; }
972 };
973 
getRequiredAllocationCount(void)974 deUint32 BottomLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
975 {
976 	/*
977 		de::MovePtr<BufferWithMemory>							m_geometryBuffer; // but only when m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
978 		de::MovePtr<Allocation>									m_accelerationStructureAlloc;
979 		de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
980 	*/
981 	return 3u;
982 }
983 
~BottomLevelAccelerationStructureKHR()984 BottomLevelAccelerationStructureKHR::~BottomLevelAccelerationStructureKHR ()
985 {
986 }
987 
BottomLevelAccelerationStructureKHR()988 BottomLevelAccelerationStructureKHR::BottomLevelAccelerationStructureKHR ()
989 	: BottomLevelAccelerationStructure	()
990 	, m_buildType						(VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
991 	, m_createFlags						(0u)
992 	, m_createGeneric					(false)
993 	, m_creationBufferUnbounded			(false)
994 	, m_buildFlags						(0u)
995 	, m_buildWithoutGeometries			(false)
996 	, m_buildWithoutPrimitives			(false)
997 	, m_deferredOperation				(false)
998 	, m_workerThreadCount				(0)
999 	, m_useArrayOfPointers				(false)
1000 	, m_accelerationStructureBuffer		(DE_NULL)
1001 	, m_vertexBuffer					(DE_NULL)
1002 	, m_indexBuffer						(DE_NULL)
1003 	, m_deviceScratchBuffer				(DE_NULL)
1004 	, m_hostScratchBuffer				(new std::vector<deUint8>)
1005 	, m_accelerationStructureKHR		()
1006 	, m_indirectBuffer					(DE_NULL)
1007 	, m_indirectBufferOffset			(0)
1008 	, m_indirectBufferStride			(0)
1009 {
1010 }
1011 
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)1012 void BottomLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR	buildType)
1013 {
1014 	m_buildType = buildType;
1015 }
1016 
getBuildType() const1017 VkAccelerationStructureBuildTypeKHR BottomLevelAccelerationStructureKHR::getBuildType () const
1018 {
1019 	return m_buildType;
1020 }
1021 
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)1022 void BottomLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR	createFlags)
1023 {
1024 	m_createFlags = createFlags;
1025 }
1026 
setCreateGeneric(bool createGeneric)1027 void BottomLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
1028 {
1029 	m_createGeneric = createGeneric;
1030 }
1031 
setCreationBufferUnbounded(bool creationBufferUnbounded)1032 void BottomLevelAccelerationStructureKHR::setCreationBufferUnbounded (bool creationBufferUnbounded)
1033 {
1034 	m_creationBufferUnbounded = creationBufferUnbounded;
1035 }
1036 
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)1037 void BottomLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR	buildFlags)
1038 {
1039 	m_buildFlags = buildFlags;
1040 }
1041 
setBuildWithoutGeometries(bool buildWithoutGeometries)1042 void BottomLevelAccelerationStructureKHR::setBuildWithoutGeometries (bool buildWithoutGeometries)
1043 {
1044 	m_buildWithoutGeometries = buildWithoutGeometries;
1045 }
1046 
setBuildWithoutPrimitives(bool buildWithoutPrimitives)1047 void BottomLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
1048 {
1049 	m_buildWithoutPrimitives = buildWithoutPrimitives;
1050 }
1051 
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)1052 void BottomLevelAccelerationStructureKHR::setDeferredOperation (const bool		deferredOperation,
1053 																const deUint32	workerThreadCount)
1054 {
1055 	m_deferredOperation = deferredOperation;
1056 	m_workerThreadCount = workerThreadCount;
1057 }
1058 
setUseArrayOfPointers(const bool useArrayOfPointers)1059 void BottomLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool	useArrayOfPointers)
1060 {
1061 	m_useArrayOfPointers = useArrayOfPointers;
1062 }
1063 
setUseMaintenance5(const bool useMaintenance5)1064 void BottomLevelAccelerationStructureKHR::setUseMaintenance5(const bool	useMaintenance5)
1065 {
1066 	m_useMaintenance5 = useMaintenance5;
1067 }
1068 
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)1069 void BottomLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer		indirectBuffer,
1070 																	  const VkDeviceSize	indirectBufferOffset,
1071 																	  const deUint32		indirectBufferStride)
1072 {
1073 	m_indirectBuffer		= indirectBuffer;
1074 	m_indirectBufferOffset	= indirectBufferOffset;
1075 	m_indirectBufferStride	= indirectBufferStride;
1076 }
1077 
getBuildFlags() const1078 VkBuildAccelerationStructureFlagsKHR BottomLevelAccelerationStructureKHR::getBuildFlags () const
1079 {
1080 	return m_buildFlags;
1081 }
1082 
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement,const VkBuffer creationBuffer,const VkDeviceSize creationBufferSize)1083 void BottomLevelAccelerationStructureKHR::create (const DeviceInterface&				vk,
1084 												  const VkDevice						device,
1085 												  Allocator&							allocator,
1086 												  VkDeviceSize							structureSize,
1087 												  VkDeviceAddress						deviceAddress,
1088 												  const void*							pNext,
1089 												  const MemoryRequirement&				addMemoryRequirement,
1090 												  const VkBuffer						creationBuffer,
1091 												  const VkDeviceSize					creationBufferSize)
1092 {
1093 	// AS may be built from geometries using vkCmdBuildAccelerationStructuresKHR / vkBuildAccelerationStructuresKHR
1094 	// or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
1095 	DE_ASSERT(!m_geometriesData.empty() !=  !(structureSize == 0)); // logical xor
1096 
1097 	if (structureSize == 0)
1098 	{
1099 		std::vector<VkAccelerationStructureGeometryKHR>			accelerationStructureGeometriesKHR;
1100 		std::vector<VkAccelerationStructureGeometryKHR*>		accelerationStructureGeometriesKHRPointers;
1101 		std::vector<VkAccelerationStructureBuildRangeInfoKHR>	accelerationStructureBuildRangeInfoKHR;
1102 		std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1103 		std::vector<deUint32>									maxPrimitiveCounts;
1104 		prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
1105 
1106 		const VkAccelerationStructureGeometryKHR*				accelerationStructureGeometriesKHRPointer	= accelerationStructureGeometriesKHR.data();
1107 		const VkAccelerationStructureGeometryKHR* const*		accelerationStructureGeometry				= accelerationStructureGeometriesKHRPointers.data();
1108 
1109 		const deUint32											geometryCount								= (m_buildWithoutGeometries
1110 																											? 0u
1111 																											: static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1112 		VkAccelerationStructureBuildGeometryInfoKHR	accelerationStructureBuildGeometryInfoKHR	=
1113 		{
1114 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,			//  VkStructureType										sType;
1115 			DE_NULL,																	//  const void*											pNext;
1116 			VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,							//  VkAccelerationStructureTypeKHR						type;
1117 			m_buildFlags,																//  VkBuildAccelerationStructureFlagsKHR				flags;
1118 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,								//  VkBuildAccelerationStructureModeKHR					mode;
1119 			DE_NULL,																	//  VkAccelerationStructureKHR							srcAccelerationStructure;
1120 			DE_NULL,																	//  VkAccelerationStructureKHR							dstAccelerationStructure;
1121 			geometryCount,																//  deUint32											geometryCount;
1122 			m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,	//  const VkAccelerationStructureGeometryKHR*			pGeometries;
1123 			m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,				//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
1124 			makeDeviceOrHostAddressKHR(DE_NULL)											//  VkDeviceOrHostAddressKHR							scratchData;
1125 		};
1126 		VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
1127 		{
1128 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
1129 			DE_NULL,														//  const void*		pNext;
1130 			0,																//  VkDeviceSize	accelerationStructureSize;
1131 			0,																//  VkDeviceSize	updateScratchSize;
1132 			0																//  VkDeviceSize	buildScratchSize;
1133 		};
1134 
1135 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
1136 
1137 		m_structureSize		= sizeInfo.accelerationStructureSize;
1138 		m_updateScratchSize	= sizeInfo.updateScratchSize;
1139 		m_buildScratchSize	= sizeInfo.buildScratchSize;
1140 	}
1141 	else
1142 	{
1143 		m_structureSize		= structureSize;
1144 		m_updateScratchSize	= 0u;
1145 		m_buildScratchSize	= 0u;
1146 	}
1147 
1148 	const bool externalCreationBuffer = (creationBuffer != VK_NULL_HANDLE);
1149 
1150 	if (externalCreationBuffer)
1151 	{
1152 		DE_UNREF(creationBufferSize); // For release builds.
1153 		DE_ASSERT(creationBufferSize >= m_structureSize);
1154 	}
1155 
1156 	if (!externalCreationBuffer)
1157 	{
1158 		VkBufferCreateInfo					bufferCreateInfo	= makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1159 		VkBufferUsageFlags2CreateInfoKHR	bufferUsageFlags2	= vk::initVulkanStructure();
1160 
1161 		if (m_useMaintenance5)
1162 		{
1163 			bufferUsageFlags2.usage = VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR;
1164 			bufferCreateInfo.pNext = &bufferUsageFlags2;
1165 			bufferCreateInfo.usage = 0;
1166 		}
1167 
1168 		const MemoryRequirement			memoryRequirement		= addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
1169 		const bool						bindMemOnCreation		= (!m_creationBufferUnbounded);
1170 
1171 		try
1172 		{
1173 			m_accelerationStructureBuffer	= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, (MemoryRequirement::Cached | memoryRequirement), bindMemOnCreation));
1174 		}
1175 		catch (const tcu::NotSupportedError&)
1176 		{
1177 			// retry without Cached flag
1178 			m_accelerationStructureBuffer	= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement, bindMemOnCreation));
1179 		}
1180 	}
1181 
1182 	const auto createInfoBuffer = (externalCreationBuffer ? creationBuffer : getAccelerationStructureBuffer()->get());
1183 	const auto createInfoOffset = (externalCreationBuffer ? static_cast<VkDeviceSize>(0) : getAccelerationStructureBufferOffset());
1184 	{
1185 		const VkAccelerationStructureTypeKHR		structureType						= (m_createGeneric
1186 																						   ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
1187 																						   : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
1188 		const VkAccelerationStructureCreateInfoKHR	accelerationStructureCreateInfoKHR
1189 		{
1190 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,						//  VkStructureType											sType;
1191 			pNext,																			//  const void*												pNext;
1192 			m_createFlags,																	//  VkAccelerationStructureCreateFlagsKHR					createFlags;
1193 			createInfoBuffer,																//  VkBuffer												buffer;
1194 			createInfoOffset,																//  VkDeviceSize											offset;
1195 			m_structureSize,																//  VkDeviceSize											size;
1196 			structureType,																	//  VkAccelerationStructureTypeKHR							type;
1197 			deviceAddress																	//  VkDeviceAddress											deviceAddress;
1198 		};
1199 
1200 		m_accelerationStructureKHR	= createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
1201 
1202 		// Make sure buffer memory is always bound after creation.
1203 		if (!externalCreationBuffer)
1204 			m_accelerationStructureBuffer->bindMemory();
1205 	}
1206 
1207 	if (m_buildScratchSize > 0u)
1208 	{
1209 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1210 		{
1211 			const VkBufferCreateInfo		bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1212 			m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1213 		}
1214 		else
1215 		{
1216 			m_hostScratchBuffer->resize(static_cast<size_t>(m_buildScratchSize));
1217 		}
1218 	}
1219 
1220 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR && !m_geometriesData.empty())
1221 	{
1222 		VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(getVertexBufferSize(m_geometriesData), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1223 		VkBufferUsageFlags2CreateInfoKHR bufferUsageFlags2 = vk::initVulkanStructure();
1224 
1225 		if (m_useMaintenance5)
1226 		{
1227 			bufferUsageFlags2.usage = vk::VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR;
1228 			bufferCreateInfo.pNext = &bufferUsageFlags2;
1229 			bufferCreateInfo.usage = 0;
1230 		}
1231 
1232 		const vk::MemoryRequirement memoryRequirement = MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
1233 		m_vertexBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
1234 
1235 		bufferCreateInfo.size = getIndexBufferSize(m_geometriesData);
1236 		if (bufferCreateInfo.size)
1237 			m_indexBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
1238 		else
1239 			m_indexBuffer = de::MovePtr<BufferWithMemory>(nullptr);
1240 	}
1241 }
1242 
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,BottomLevelAccelerationStructure * srcAccelerationStructure)1243 void BottomLevelAccelerationStructureKHR::build (const DeviceInterface&						vk,
1244 												 const VkDevice								device,
1245 												 const VkCommandBuffer						cmdBuffer,
1246 												 BottomLevelAccelerationStructure*          srcAccelerationStructure)
1247 {
1248 	DE_ASSERT(!m_geometriesData.empty());
1249 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1250 	DE_ASSERT(m_buildScratchSize != 0);
1251 
1252 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1253 	{
1254 		updateVertexBuffer(vk, device, m_geometriesData,  getVertexBuffer(), getVertexBufferOffset());
1255 		if(getIndexBuffer() != DE_NULL)
1256 			updateIndexBuffer(vk, device, m_geometriesData, getIndexBuffer(), getIndexBufferOffset());
1257 	}
1258 
1259 	{
1260 		std::vector<VkAccelerationStructureGeometryKHR>			accelerationStructureGeometriesKHR;
1261 		std::vector<VkAccelerationStructureGeometryKHR*>		accelerationStructureGeometriesKHRPointers;
1262 		std::vector<VkAccelerationStructureBuildRangeInfoKHR>	accelerationStructureBuildRangeInfoKHR;
1263 		std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1264 		std::vector<deUint32>									maxPrimitiveCounts;
1265 
1266 		prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers,
1267 						  accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts, getVertexBufferOffset(), getIndexBufferOffset());
1268 
1269 		const VkAccelerationStructureGeometryKHR*			accelerationStructureGeometriesKHRPointer	= accelerationStructureGeometriesKHR.data();
1270 		const VkAccelerationStructureGeometryKHR* const*	accelerationStructureGeometry				= accelerationStructureGeometriesKHRPointers.data();
1271 		VkDeviceOrHostAddressKHR							scratchData									= (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1272 																										? makeDeviceOrHostAddressKHR(vk, device, getDeviceScratchBuffer()->get(), getDeviceScratchBufferOffset())
1273 																										: makeDeviceOrHostAddressKHR(getHostScratchBuffer()->data());
1274 		const deUint32										geometryCount								= (m_buildWithoutGeometries
1275 																										? 0u
1276 																										: static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1277 
1278 		VkAccelerationStructureKHR				srcStructure									= (srcAccelerationStructure != DE_NULL) ? *(srcAccelerationStructure->getPtr()) : DE_NULL;
1279 		VkBuildAccelerationStructureModeKHR		mode											= (srcAccelerationStructure != DE_NULL) ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
1280 
1281 		VkAccelerationStructureBuildGeometryInfoKHR	accelerationStructureBuildGeometryInfoKHR	=
1282 		{
1283 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,			//  VkStructureType										sType;
1284 			DE_NULL,																	//  const void*											pNext;
1285 			VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,							//  VkAccelerationStructureTypeKHR						type;
1286 			m_buildFlags,																//  VkBuildAccelerationStructureFlagsKHR				flags;
1287 			mode,								//  VkBuildAccelerationStructureModeKHR					mode;
1288 			srcStructure,																	//  VkAccelerationStructureKHR							srcAccelerationStructure;
1289 			m_accelerationStructureKHR.get(),											//  VkAccelerationStructureKHR							dstAccelerationStructure;
1290 			geometryCount,																//  deUint32											geometryCount;
1291 			m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,	//  const VkAccelerationStructureGeometryKHR*			pGeometries;
1292 			m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,				//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
1293 			scratchData																	//  VkDeviceOrHostAddressKHR							scratchData;
1294 		};
1295 
1296 		VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr	= accelerationStructureBuildRangeInfoKHR.data();
1297 
1298 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1299 		{
1300 			if (m_indirectBuffer == DE_NULL)
1301 				vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1302 			else
1303 			{
1304 				VkDeviceAddress	indirectDeviceAddress	= getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
1305 				deUint32*		pMaxPrimitiveCounts		= maxPrimitiveCounts.data();
1306 				vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
1307 			}
1308 		}
1309 		else if (!m_deferredOperation)
1310 		{
1311 			VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
1312 		}
1313 		else
1314 		{
1315 			const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1316 			const auto deferredOperation	= deferredOperationPtr.get();
1317 
1318 			VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1319 
1320 			DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1321 
1322 			finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1323 		}
1324 	}
1325 
1326 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1327 	{
1328 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1329 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
1330 
1331 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1332 	}
1333 }
1334 
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,BottomLevelAccelerationStructure * accelerationStructure,bool compactCopy)1335 void BottomLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&						vk,
1336 													const VkDevice								device,
1337 													const VkCommandBuffer						cmdBuffer,
1338 													BottomLevelAccelerationStructure*			accelerationStructure,
1339 													bool										compactCopy)
1340 {
1341 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1342 	DE_ASSERT(accelerationStructure != DE_NULL);
1343 
1344 	VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1345 	{
1346 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,															// VkStructureType						sType;
1347 		DE_NULL,																										// const void*							pNext;
1348 		*(accelerationStructure->getPtr()),																				// VkAccelerationStructureKHR			src;
1349 		*(getPtr()),																									// VkAccelerationStructureKHR			dst;
1350 		compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR	// VkCopyAccelerationStructureModeKHR	mode;
1351 	};
1352 
1353 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1354 	{
1355 		vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1356 	}
1357 	else if (!m_deferredOperation)
1358 	{
1359 		VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1360 	}
1361 	else
1362 	{
1363 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1364 		const auto deferredOperation	= deferredOperationPtr.get();
1365 
1366 		VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1367 
1368 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1369 
1370 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1371 	}
1372 
1373 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1374 	{
1375 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1376 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
1377 
1378 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1379 	}
1380 }
1381 
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1382 void BottomLevelAccelerationStructureKHR::serialize (const DeviceInterface&		vk,
1383 													 const VkDevice				device,
1384 													 const VkCommandBuffer		cmdBuffer,
1385 													 SerialStorage*				storage)
1386 {
1387 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1388 	DE_ASSERT(storage != DE_NULL);
1389 
1390 	const VkCopyAccelerationStructureToMemoryInfoKHR	copyAccelerationStructureInfo	=
1391 	{
1392 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,	// VkStructureType						sType;
1393 		DE_NULL,															// const void*							pNext;
1394 		*(getPtr()),														// VkAccelerationStructureKHR			src;
1395 		storage->getAddress(vk, device, m_buildType),						// VkDeviceOrHostAddressKHR				dst;
1396 		VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR	mode;
1397 	};
1398 
1399 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1400 	{
1401 		vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
1402 	}
1403 	else if (!m_deferredOperation)
1404 	{
1405 		VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1406 	}
1407 	else
1408 	{
1409 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1410 		const auto deferredOperation	= deferredOperationPtr.get();
1411 
1412 		const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1413 
1414 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1415 
1416 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1417 	}
1418 }
1419 
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1420 void BottomLevelAccelerationStructureKHR::deserialize (const DeviceInterface&	vk,
1421 													   const VkDevice			device,
1422 													   const VkCommandBuffer	cmdBuffer,
1423 													   SerialStorage*			storage)
1424 {
1425 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1426 	DE_ASSERT(storage != DE_NULL);
1427 
1428 	const VkCopyMemoryToAccelerationStructureInfoKHR	copyAccelerationStructureInfo	=
1429 	{
1430 		VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,	// VkStructureType							sType;
1431 		DE_NULL,															// const void*								pNext;
1432 		storage->getAddressConst(vk, device, m_buildType),					// VkDeviceOrHostAddressConstKHR			src;
1433 		*(getPtr()),														// VkAccelerationStructureKHR				dst;
1434 		VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR		mode;
1435 	};
1436 
1437 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1438 	{
1439 		vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1440 	}
1441 	else if (!m_deferredOperation)
1442 	{
1443 		VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1444 	}
1445 	else
1446 	{
1447 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
1448 		const auto deferredOperation	= deferredOperationPtr.get();
1449 
1450 		const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1451 
1452 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1453 
1454 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1455 	}
1456 
1457 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1458 	{
1459 		const VkAccessFlags		accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1460 		const VkMemoryBarrier	memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1461 
1462 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1463 	}
1464 }
1465 
getPtr(void) const1466 const VkAccelerationStructureKHR* BottomLevelAccelerationStructureKHR::getPtr (void) const
1467 {
1468 	return &m_accelerationStructureKHR.get();
1469 }
1470 
prepareGeometries(const DeviceInterface & vk,const VkDevice device,std::vector<VkAccelerationStructureGeometryKHR> & accelerationStructureGeometriesKHR,std::vector<VkAccelerationStructureGeometryKHR * > & accelerationStructureGeometriesKHRPointers,std::vector<VkAccelerationStructureBuildRangeInfoKHR> & accelerationStructureBuildRangeInfoKHR,std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> & accelerationStructureGeometryMicromapsEXT,std::vector<deUint32> & maxPrimitiveCounts,VkDeviceSize vertexBufferOffset,VkDeviceSize indexBufferOffset) const1471 void BottomLevelAccelerationStructureKHR::prepareGeometries (const DeviceInterface&												vk,
1472 															 const VkDevice														device,
1473 															 std::vector<VkAccelerationStructureGeometryKHR>&					accelerationStructureGeometriesKHR,
1474 															 std::vector<VkAccelerationStructureGeometryKHR*>&					accelerationStructureGeometriesKHRPointers,
1475 															 std::vector<VkAccelerationStructureBuildRangeInfoKHR>&				accelerationStructureBuildRangeInfoKHR,
1476 															 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>&	accelerationStructureGeometryMicromapsEXT,
1477 															 std::vector<deUint32>&												maxPrimitiveCounts,
1478 															 VkDeviceSize														vertexBufferOffset,
1479 															 VkDeviceSize														indexBufferOffset) const
1480 {
1481 	accelerationStructureGeometriesKHR.resize(m_geometriesData.size());
1482 	accelerationStructureGeometriesKHRPointers.resize(m_geometriesData.size());
1483 	accelerationStructureBuildRangeInfoKHR.resize(m_geometriesData.size());
1484 	accelerationStructureGeometryMicromapsEXT.resize(m_geometriesData.size());
1485 	maxPrimitiveCounts.resize(m_geometriesData.size());
1486 
1487 	for (size_t geometryNdx = 0; geometryNdx < m_geometriesData.size(); ++geometryNdx)
1488 	{
1489 		const de::SharedPtr<RaytracedGeometryBase>&				geometryData = m_geometriesData[geometryNdx];
1490 		VkDeviceOrHostAddressConstKHR							vertexData, indexData;
1491 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1492 		{
1493 			if (getVertexBuffer() != DE_NULL)
1494 			{
1495 				vertexData			= makeDeviceOrHostAddressConstKHR(vk, device, getVertexBuffer()->get(), vertexBufferOffset);
1496 				if (m_indirectBuffer == DE_NULL )
1497 				{
1498 					vertexBufferOffset	+= deAlignSize(geometryData->getVertexByteSize(), 8);
1499 				}
1500 			}
1501 			else
1502 				vertexData			= makeDeviceOrHostAddressConstKHR(DE_NULL);
1503 
1504 			if (getIndexBuffer() != DE_NULL &&  geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1505 			{
1506 				indexData			= makeDeviceOrHostAddressConstKHR(vk, device, getIndexBuffer()->get(), indexBufferOffset);
1507 				indexBufferOffset	+= deAlignSize(geometryData->getIndexByteSize(), 8);
1508 			}
1509 			else
1510 				indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1511 		}
1512 		else
1513 		{
1514 			vertexData = makeDeviceOrHostAddressConstKHR(geometryData->getVertexPointer());
1515 			if (geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1516 				indexData = makeDeviceOrHostAddressConstKHR(geometryData->getIndexPointer());
1517 			else
1518 				indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1519 		}
1520 
1521 		VkAccelerationStructureGeometryTrianglesDataKHR	accelerationStructureGeometryTrianglesDataKHR =
1522 		{
1523 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,	//  VkStructureType					sType;
1524 			DE_NULL,																//  const void*						pNext;
1525 			geometryData->getVertexFormat(),										//  VkFormat						vertexFormat;
1526 			vertexData,																//  VkDeviceOrHostAddressConstKHR	vertexData;
1527 			geometryData->getVertexStride(),										//  VkDeviceSize					vertexStride;
1528 			static_cast<deUint32>(geometryData->getVertexCount()),					//  uint32_t						maxVertex;
1529 			geometryData->getIndexType(),											//  VkIndexType						indexType;
1530 			indexData,																//  VkDeviceOrHostAddressConstKHR	indexData;
1531 			makeDeviceOrHostAddressConstKHR(DE_NULL),								//  VkDeviceOrHostAddressConstKHR	transformData;
1532 		};
1533 
1534 		if (geometryData->getHasOpacityMicromap())
1535 			accelerationStructureGeometryTrianglesDataKHR.pNext = &geometryData->getOpacityMicromap();
1536 
1537 		const VkAccelerationStructureGeometryAabbsDataKHR		accelerationStructureGeometryAabbsDataKHR =
1538 		{
1539 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR,	//  VkStructureType					sType;
1540 			DE_NULL,															//  const void*						pNext;
1541 			vertexData,															//  VkDeviceOrHostAddressConstKHR	data;
1542 			geometryData->getAABBStride()										//  VkDeviceSize					stride;
1543 		};
1544 		const VkAccelerationStructureGeometryDataKHR			geometry = (geometryData->isTrianglesType())
1545 																		 ? makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryTrianglesDataKHR)
1546 																		 : makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryAabbsDataKHR);
1547 		const VkAccelerationStructureGeometryKHR				accelerationStructureGeometryKHR =
1548 		{
1549 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,	//  VkStructureType							sType;
1550 			DE_NULL,												//  const void*								pNext;
1551 			geometryData->getGeometryType(),						//  VkGeometryTypeKHR						geometryType;
1552 			geometry,												//  VkAccelerationStructureGeometryDataKHR	geometry;
1553 			geometryData->getGeometryFlags()						//  VkGeometryFlagsKHR						flags;
1554 		};
1555 
1556 		const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : geometryData->getPrimitiveCount());
1557 
1558 		const VkAccelerationStructureBuildRangeInfoKHR			accelerationStructureBuildRangeInfosKHR =
1559 		{
1560 			primitiveCount,	//  deUint32	primitiveCount;
1561 			0,				//  deUint32	primitiveOffset;
1562 			0,				//  deUint32	firstVertex;
1563 			0				//  deUint32	firstTransform;
1564 		};
1565 
1566 		accelerationStructureGeometriesKHR[geometryNdx]			= accelerationStructureGeometryKHR;
1567 		accelerationStructureGeometriesKHRPointers[geometryNdx]	= &accelerationStructureGeometriesKHR[geometryNdx];
1568 		accelerationStructureBuildRangeInfoKHR[geometryNdx]		= accelerationStructureBuildRangeInfosKHR;
1569 		maxPrimitiveCounts[geometryNdx]							= geometryData->getPrimitiveCount();
1570 	}
1571 }
1572 
getRequiredAllocationCount(void)1573 deUint32 BottomLevelAccelerationStructure::getRequiredAllocationCount (void)
1574 {
1575 	return BottomLevelAccelerationStructureKHR::getRequiredAllocationCount();
1576 }
1577 
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)1578 void BottomLevelAccelerationStructure::createAndBuild (const DeviceInterface&	vk,
1579 													   const VkDevice			device,
1580 													   const VkCommandBuffer	cmdBuffer,
1581 													   Allocator&				allocator,
1582 													   VkDeviceAddress			deviceAddress)
1583 {
1584 	create(vk, device, allocator, 0u, deviceAddress);
1585 	build(vk, device, cmdBuffer);
1586 }
1587 
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,BottomLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)1588 void BottomLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&				vk,
1589 														  const VkDevice						device,
1590 														  const VkCommandBuffer					cmdBuffer,
1591 														  Allocator&							allocator,
1592 														  BottomLevelAccelerationStructure*		accelerationStructure,
1593 														  VkDeviceSize							compactCopySize,
1594 														  VkDeviceAddress						deviceAddress)
1595 {
1596 	DE_ASSERT(accelerationStructure != NULL);
1597 	VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
1598 	DE_ASSERT(copiedSize != 0u);
1599 
1600 	create(vk, device, allocator, copiedSize, deviceAddress);
1601 	copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
1602 }
1603 
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)1604 void BottomLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
1605 																 const VkDevice								device,
1606 																 const VkCommandBuffer						cmdBuffer,
1607 																 Allocator&									allocator,
1608 																 SerialStorage*								storage,
1609 																 VkDeviceAddress							deviceAddress )
1610 {
1611 	DE_ASSERT(storage != NULL);
1612 	DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
1613 	create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
1614 	deserialize(vk, device, cmdBuffer, storage);
1615 }
1616 
updateGeometry(size_t geometryIndex,de::SharedPtr<RaytracedGeometryBase> & raytracedGeometry)1617 void BottomLevelAccelerationStructureKHR::updateGeometry (size_t									geometryIndex,
1618 														  de::SharedPtr<RaytracedGeometryBase>&	raytracedGeometry)
1619 {
1620 	DE_ASSERT(geometryIndex < m_geometriesData.size());
1621 	m_geometriesData[geometryIndex] = raytracedGeometry;
1622 }
1623 
makeBottomLevelAccelerationStructure()1624 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure ()
1625 {
1626 	return de::MovePtr<BottomLevelAccelerationStructure>(new BottomLevelAccelerationStructureKHR);
1627 }
1628 
1629 // Forward declaration
1630 struct BottomLevelAccelerationStructurePoolImpl;
1631 
1632 class BottomLevelAccelerationStructurePoolMember : public BottomLevelAccelerationStructureKHR
1633 {
1634 public:
1635 	friend class BottomLevelAccelerationStructurePool;
1636 
1637 								BottomLevelAccelerationStructurePoolMember	(BottomLevelAccelerationStructurePoolImpl& pool);
1638 								BottomLevelAccelerationStructurePoolMember	(const BottomLevelAccelerationStructurePoolMember&) = delete;
1639 								BottomLevelAccelerationStructurePoolMember	(BottomLevelAccelerationStructurePoolMember&&) = delete;
1640 	virtual						~BottomLevelAccelerationStructurePoolMember	() = default;
1641 
create(const DeviceInterface &,const VkDevice,Allocator &,VkDeviceSize,VkDeviceAddress,const void *,const MemoryRequirement &,const VkBuffer,const VkDeviceSize)1642 	virtual void				create										(const DeviceInterface&,
1643 																			 const VkDevice,
1644 																			 Allocator&,
1645 																			 VkDeviceSize,
1646 																			 VkDeviceAddress,
1647 																			 const void*,
1648 																			 const MemoryRequirement&,
1649 																			 const VkBuffer,
1650 																			 const VkDeviceSize) override
1651 								{
1652 									DE_ASSERT(0); // Silent this method
1653 								}
1654 	virtual auto				computeBuildSize							(const DeviceInterface&	vk,
1655 																			 const VkDevice			device,
1656 																			 const VkDeviceSize		strSize) const
1657 																			 //              accStrSize,updateScratch, buildScratch, vertexSize,   indexSize
1658 																			 -> std::tuple<VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize>;
1659 protected:
1660 	struct Info;
1661 	virtual void				preCreateSetSizesAndOffsets					(const Info&			info,
1662 																			 const VkDeviceSize		accStrSize,
1663 																			 const VkDeviceSize		updateScratchSize,
1664 																			 const VkDeviceSize		buildScratchSize);
1665 	virtual void				createAccellerationStructure				(const DeviceInterface&	vk,
1666 																			 const VkDevice			device,
1667 																			 VkDeviceAddress		deviceAddress);
1668 
1669 	virtual BufferWithMemory*	getAccelerationStructureBuffer				() const override;
1670 	virtual BufferWithMemory*	getDeviceScratchBuffer						() const override;
1671 	virtual std::vector<deUint8>*	getHostScratchBuffer					() const override;
1672 	virtual BufferWithMemory*	getVertexBuffer								() const override;
1673 	virtual BufferWithMemory*	getIndexBuffer								() const override;
1674 
getAccelerationStructureBufferOffset() const1675 	virtual VkDeviceSize		getAccelerationStructureBufferOffset		() const override { return m_info.accStrOffset; }
getDeviceScratchBufferOffset() const1676 	virtual VkDeviceSize		getDeviceScratchBufferOffset				() const override { return m_info.buildScratchBuffOffset; }
getVertexBufferOffset() const1677 	virtual VkDeviceSize		getVertexBufferOffset						() const override { return m_info.vertBuffOffset; }
getIndexBufferOffset() const1678 	virtual VkDeviceSize		getIndexBufferOffset						() const override { return m_info.indexBuffOffset; }
1679 
1680 	BottomLevelAccelerationStructurePoolImpl&	m_pool;
1681 
1682 	struct Info
1683 	{
1684 		deUint32				accStrIndex;
1685 		VkDeviceSize			accStrOffset;
1686 		deUint32				vertBuffIndex;
1687 		VkDeviceSize			vertBuffOffset;
1688 		deUint32				indexBuffIndex;
1689 		VkDeviceSize			indexBuffOffset;
1690 		deUint32				buildScratchBuffIndex;
1691 		VkDeviceSize			buildScratchBuffOffset;
1692 	}											m_info;
1693 };
1694 
negz(const X &)1695 template<class X> inline X negz (const X&)
1696 {
1697 	return (~static_cast<X>(0));
1698 }
isnegz(const X & x)1699 template<class X> inline bool isnegz (const X& x)
1700 {
1701 	return x == negz(x);
1702 }
make_unsigned(const Y & y)1703 template<class Y> inline auto make_unsigned(const Y& y) -> typename std::make_unsigned<Y>::type
1704 {
1705 	return static_cast<typename std::make_unsigned<Y>::type>(y);
1706 }
1707 
BottomLevelAccelerationStructurePoolMember(BottomLevelAccelerationStructurePoolImpl & pool)1708 BottomLevelAccelerationStructurePoolMember::BottomLevelAccelerationStructurePoolMember	(BottomLevelAccelerationStructurePoolImpl& pool)
1709 	: m_pool	(pool)
1710 	, m_info	{}
1711 {
1712 }
1713 
1714 struct BottomLevelAccelerationStructurePoolImpl
1715 {
1716 	BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePoolImpl&&) = delete;
1717 	BottomLevelAccelerationStructurePoolImpl (const BottomLevelAccelerationStructurePoolImpl&) = delete;
1718 	BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool);
1719 
1720 	BottomLevelAccelerationStructurePool&			m_pool;
1721 	std::vector<de::SharedPtr<BufferWithMemory>>	m_accellerationStructureBuffers;
1722 	de::SharedPtr<BufferWithMemory>					m_deviceScratchBuffer;
1723 	de::UniquePtr<std::vector<deUint8>>				m_hostScratchBuffer;
1724 	std::vector<de::SharedPtr<BufferWithMemory>>	m_vertexBuffers;
1725 	std::vector<de::SharedPtr<BufferWithMemory>>	m_indexBuffers;
1726 };
BottomLevelAccelerationStructurePoolImpl(BottomLevelAccelerationStructurePool & pool)1727 BottomLevelAccelerationStructurePoolImpl::BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool)
1728 	: m_pool							(pool)
1729 	, m_accellerationStructureBuffers	()
1730 	, m_deviceScratchBuffer				()
1731 	, m_hostScratchBuffer				(new std::vector<deUint8>)
1732 	, m_vertexBuffers					()
1733 	, m_indexBuffers					()
1734 {
1735 }
getAccelerationStructureBuffer() const1736 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getAccelerationStructureBuffer () const
1737 {
1738 	BufferWithMemory* result = nullptr;
1739 	if (m_pool.m_accellerationStructureBuffers.size())
1740 	{
1741 		DE_ASSERT(!isnegz(m_info.accStrIndex));
1742 		result = m_pool.m_accellerationStructureBuffers[m_info.accStrIndex].get();
1743 	}
1744 	return result;
1745 }
getDeviceScratchBuffer() const1746 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getDeviceScratchBuffer () const
1747 {
1748 	DE_ASSERT(m_info.buildScratchBuffIndex == 0);
1749 	return m_pool.m_deviceScratchBuffer.get();
1750 }
getHostScratchBuffer() const1751 std::vector<deUint8>* BottomLevelAccelerationStructurePoolMember::getHostScratchBuffer () const
1752 {
1753 	return this->m_buildScratchSize ? m_pool.m_hostScratchBuffer.get() : nullptr;
1754 }
1755 
getVertexBuffer() const1756 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getVertexBuffer () const
1757 {
1758 	BufferWithMemory* result = nullptr;
1759 	if (m_pool.m_vertexBuffers.size())
1760 	{
1761 		DE_ASSERT(!isnegz(m_info.vertBuffIndex));
1762 		result = m_pool.m_vertexBuffers[m_info.vertBuffIndex].get();
1763 	}
1764 	return result;
1765 }
getIndexBuffer() const1766 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getIndexBuffer () const
1767 {
1768 	BufferWithMemory* result = nullptr;
1769 	if (m_pool.m_indexBuffers.size())
1770 	{
1771 		DE_ASSERT(!isnegz(m_info.indexBuffIndex));
1772 		result = m_pool.m_indexBuffers[m_info.indexBuffIndex].get();
1773 	}
1774 	return result;
1775 }
1776 
1777 struct BottomLevelAccelerationStructurePool::Impl : BottomLevelAccelerationStructurePoolImpl
1778 {
1779 	friend class BottomLevelAccelerationStructurePool;
1780 	friend class BottomLevelAccelerationStructurePoolMember;
1781 
Implvk::BottomLevelAccelerationStructurePool::Impl1782 	Impl (BottomLevelAccelerationStructurePool& pool)
1783 		: BottomLevelAccelerationStructurePoolImpl(pool) { }
1784 };
1785 
BottomLevelAccelerationStructurePool()1786 BottomLevelAccelerationStructurePool::BottomLevelAccelerationStructurePool ()
1787 	: m_batchStructCount	(4)
1788 	, m_batchGeomCount		(0)
1789 	, m_infos				()
1790 	, m_structs				()
1791 	, m_createOnce			(false)
1792 	, m_tryCachedMemory		(true)
1793 	, m_structsBuffSize		(0)
1794 	, m_updatesScratchSize	(0)
1795 	, m_buildsScratchSize	(0)
1796 	, m_verticesSize		(0)
1797 	, m_indicesSize			(0)
1798 	, m_impl				(new Impl(*this))
1799 {
1800 }
1801 
~BottomLevelAccelerationStructurePool()1802 BottomLevelAccelerationStructurePool::~BottomLevelAccelerationStructurePool()
1803 {
1804 	delete m_impl;
1805 }
1806 
batchStructCount(const deUint32 & value)1807 void BottomLevelAccelerationStructurePool::batchStructCount (const deUint32& value)
1808 {
1809 	DE_ASSERT(value >= 1); m_batchStructCount = value;
1810 }
1811 
add(VkDeviceSize structureSize,VkDeviceAddress deviceAddress)1812 auto BottomLevelAccelerationStructurePool::add (VkDeviceSize		structureSize,
1813 												VkDeviceAddress		deviceAddress) -> BottomLevelAccelerationStructurePool::BlasPtr
1814 {
1815 	// Prevent a programmer from calling this method after batchCreate(...) method has been called.
1816 	if (m_createOnce) DE_ASSERT(0);
1817 
1818 	auto blas = new BottomLevelAccelerationStructurePoolMember(*m_impl);
1819 	m_infos.push_back({structureSize, deviceAddress});
1820 	m_structs.emplace_back(blas);
1821 	return m_structs.back();
1822 }
1823 
adjustBatchCount(const DeviceInterface & vkd,const VkDevice device,const std::vector<BottomLevelAccelerationStructurePool::BlasPtr> & structs,const std::vector<BottomLevelAccelerationStructurePool::BlasInfo> & infos,const VkDeviceSize maxBufferSize,deUint32 (& result)[4])1824 void adjustBatchCount (const DeviceInterface&		vkd,
1825 					   const VkDevice				device,
1826 					   const std::vector<BottomLevelAccelerationStructurePool::BlasPtr>& structs,
1827 					   const std::vector<BottomLevelAccelerationStructurePool::BlasInfo>& infos,
1828 					   const VkDeviceSize			maxBufferSize,
1829 					   deUint32						(&result)[4])
1830 {
1831 	tcu::Vector<VkDeviceSize, 4>	sizes(0);
1832 	tcu::Vector<VkDeviceSize, 4>	sums(0);
1833 	tcu::Vector<deUint32, 4>		tmps(0);
1834 	tcu::Vector<deUint32, 4>		batches(0);
1835 
1836 	VkDeviceSize	updateScratchSize = 0;	static_cast<void>(updateScratchSize);	// not used yet, disabled for future implementation
1837 
1838 	auto updateIf = [&](deUint32 c)
1839 	{
1840 		if (sums[c] + sizes[c] <= maxBufferSize)
1841 		{
1842 			sums[c] += sizes[c];
1843 			tmps[c] += 1;
1844 
1845 			batches[c] = std::max(tmps[c], batches[c]);
1846 		}
1847 		else
1848 		{
1849 			sums[c] = 0;
1850 			tmps[c] = 0;
1851 		}
1852 	};
1853 
1854 	const deUint32	maxIter	= static_cast<deUint32>(structs.size());
1855 	for (deUint32 i = 0; i < maxIter; ++i)
1856 	{
1857 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(structs[i].get());
1858 		std::tie(sizes[0], updateScratchSize, sizes[1], sizes[2], sizes[3]) = str.computeBuildSize(vkd, device, infos[i].structureSize);
1859 
1860 		updateIf(0);
1861 		updateIf(1);
1862 		updateIf(2);
1863 		updateIf(3);
1864 	}
1865 
1866 	result[0] = std::max(batches[0], 1u);
1867 	result[1] = std::max(batches[1], 1u);
1868 	result[2] = std::max(batches[2], 1u);
1869 	result[3] = std::max(batches[3], 1u);
1870 }
1871 
getAllocationCount() const1872 size_t BottomLevelAccelerationStructurePool::getAllocationCount () const
1873 {
1874 	return m_impl->m_accellerationStructureBuffers.size()
1875 			+ m_impl->m_vertexBuffers.size()
1876 			+ m_impl->m_indexBuffers.size()
1877 			+ 1 /* for scratch buffer */;
1878 }
1879 
getAllocationCount(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize maxBufferSize) const1880 size_t BottomLevelAccelerationStructurePool::getAllocationCount (const DeviceInterface&		vk,
1881 																 const VkDevice				device,
1882 																 const VkDeviceSize			maxBufferSize) const
1883 {
1884 	DE_ASSERT(m_structs.size() != 0);
1885 
1886 	std::map<deUint32, VkDeviceSize>	accStrSizes;
1887 	std::map<deUint32, VkDeviceSize>	vertBuffSizes;
1888 	std::map<deUint32, VkDeviceSize>	indexBuffSizes;
1889 	std::map<deUint32, VkDeviceSize>	scratchBuffSizes;
1890 
1891 	const deUint32	allStructsCount		= structCount();
1892 
1893 	deUint32		batchStructCount	= m_batchStructCount;
1894 	deUint32		batchScratchCount	= m_batchStructCount;
1895 	deUint32		batchVertexCount	= m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1896 	deUint32		batchIndexCount		= batchVertexCount;
1897 
1898 	if (!isnegz(maxBufferSize))
1899 	{
1900 		deUint32	batches[4];
1901 		adjustBatchCount(vk, device, m_structs, m_infos, maxBufferSize, batches);
1902 		batchStructCount	= batches[0];
1903 		batchScratchCount	= batches[1];
1904 		batchVertexCount	= batches[2];
1905 		batchIndexCount		= batches[3];
1906 	}
1907 
1908 	deUint32		iStr				= 0;
1909 	deUint32		iScratch			= 0;
1910 	deUint32		iVertex				= 0;
1911 	deUint32		iIndex				= 0;
1912 
1913 	VkDeviceSize	strSize				= 0;
1914 	VkDeviceSize	updateScratchSize	= 0;
1915 	VkDeviceSize	buildScratchSize	= 0;
1916 	VkDeviceSize	vertexSize			= 0;
1917 	VkDeviceSize	indexSize			= 0;
1918 
1919 	for (; iStr < allStructsCount; ++iStr)
1920 	{
1921 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1922 		std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[iStr].structureSize);
1923 
1924 		{
1925 			const VkDeviceSize	alignedStrSize	= deAlign64(strSize, 256);
1926 			const deUint32		accStrIndex		= (iStr / batchStructCount);
1927 			accStrSizes[accStrIndex]	+= alignedStrSize;
1928 		}
1929 
1930 		if (buildScratchSize != 0)
1931 		{
1932 			const VkDeviceSize	alignedBuilsScratchSize	= deAlign64(buildScratchSize, 256);
1933 			const deUint32		scratchBuffIndex		= (iScratch/ batchScratchCount);
1934 			scratchBuffSizes[scratchBuffIndex]	+= alignedBuilsScratchSize;
1935 			iScratch							+= 1;
1936 		}
1937 
1938 		if (vertexSize != 0)
1939 		{
1940 			const VkDeviceSize	alignedVertBuffSize	= deAlign64(vertexSize, 8);
1941 			const deUint32		vertBuffIndex		= (iVertex / batchVertexCount);
1942 			vertBuffSizes[vertBuffIndex]	+= alignedVertBuffSize;
1943 			iVertex							+= 1;
1944 		}
1945 
1946 		if (indexSize != 0)
1947 		{
1948 			const VkDeviceSize	alignedIndexBuffSize	= deAlign64(indexSize, 8);
1949 			const deUint32		indexBuffIndex			= (iIndex / batchIndexCount);
1950 			indexBuffSizes[indexBuffIndex]	+= alignedIndexBuffSize;
1951 			iIndex							+= 1;
1952 		}
1953 	}
1954 
1955 	return accStrSizes.size()
1956 			+ vertBuffSizes.size()
1957 			+ indexBuffSizes.size()
1958 			+ scratchBuffSizes.size();
1959 }
1960 
getAllocationSizes(const DeviceInterface & vk,const VkDevice device) const1961 tcu::Vector<VkDeviceSize, 4> BottomLevelAccelerationStructurePool::getAllocationSizes (const DeviceInterface&		vk,
1962 																					   const VkDevice				device) const
1963 {
1964 	if (m_structsBuffSize)
1965 	{
1966 		return tcu::Vector<VkDeviceSize, 4>(m_structsBuffSize, m_buildsScratchSize, m_verticesSize, m_indicesSize);
1967 	}
1968 
1969 	VkDeviceSize strSize				= 0;
1970 	VkDeviceSize updateScratchSize		= 0;	static_cast<void>(updateScratchSize);		// not used yet, disabled for future implementation
1971 	VkDeviceSize buildScratchSize		= 0;
1972 	VkDeviceSize vertexSize				= 0;
1973 	VkDeviceSize indexSize				= 0;
1974 	VkDeviceSize sumStrSize				= 0;
1975 	VkDeviceSize sumUpdateScratchSize	= 0;	static_cast<void>(sumUpdateScratchSize);	// not used yet, disabled for future implementation
1976 	VkDeviceSize sumBuildScratchSize	= 0;
1977 	VkDeviceSize sumVertexSize			= 0;
1978 	VkDeviceSize sumIndexSize			= 0;
1979 	for (size_t i = 0; i < structCount(); ++i)
1980 	{
1981 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[i].get());
1982 		std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[i].structureSize);
1983 		sumStrSize				+= deAlign64(strSize, 256);
1984 		//sumUpdateScratchSize	+= deAlign64(updateScratchSize, 256);	not used yet, disabled for future implementation
1985 		sumBuildScratchSize		+= deAlign64(buildScratchSize, 256);
1986 		sumVertexSize			+= deAlign64(vertexSize, 8);
1987 		sumIndexSize			+= deAlign64(indexSize, 8);
1988 	}
1989 	return tcu::Vector<VkDeviceSize, 4>(sumStrSize, sumBuildScratchSize, sumVertexSize, sumIndexSize);
1990 }
1991 
batchCreate(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator)1992 void BottomLevelAccelerationStructurePool::batchCreate (const DeviceInterface&		vkd,
1993 														const VkDevice				device,
1994 														Allocator&					allocator)
1995 {
1996 	batchCreateAdjust(vkd, device, allocator, negz<VkDeviceSize>(0));
1997 }
1998 
batchCreateAdjust(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator,const VkDeviceSize maxBufferSize)1999 void BottomLevelAccelerationStructurePool::batchCreateAdjust (const DeviceInterface&	vkd,
2000 															  const VkDevice			device,
2001 															  Allocator&				allocator,
2002 															  const VkDeviceSize		maxBufferSize)
2003 {
2004 	// Prevent a programmer from calling this method more than once.
2005 	if (m_createOnce) DE_ASSERT(0);
2006 
2007 	m_createOnce = true;
2008 	DE_ASSERT(m_structs.size() != 0);
2009 
2010 	auto createAccellerationStructureBuffer = [&](VkDeviceSize bufferSize) -> typename std::add_pointer<BufferWithMemory>::type
2011 	{
2012 		BufferWithMemory* res = nullptr;
2013 		const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2014 
2015 		if (m_tryCachedMemory) try
2016 		{
2017 			res = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2018 		}
2019 		catch (const tcu::NotSupportedError&)
2020 		{
2021 			res = nullptr;
2022 		}
2023 
2024 		return (nullptr != res)
2025 				? res
2026 				: (new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2027 	};
2028 
2029 	auto createDeviceScratchBuffer = [&](VkDeviceSize bufferSize) -> de::SharedPtr<BufferWithMemory>
2030 	{
2031 		const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2032 		BufferWithMemory* p = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2033 		return de::SharedPtr<BufferWithMemory>(p);
2034 	};
2035 
2036 	std::map<deUint32, VkDeviceSize>	accStrSizes;
2037 	std::map<deUint32, VkDeviceSize>	vertBuffSizes;
2038 	std::map<deUint32, VkDeviceSize>	indexBuffSizes;
2039 
2040 	const deUint32	allStructsCount		= structCount();
2041 	deUint32		iterKey				= 0;
2042 
2043 	deUint32		batchStructCount	= m_batchStructCount;
2044 	deUint32		batchVertexCount	= m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
2045 	deUint32		batchIndexCount		= batchVertexCount;
2046 
2047 	if (!isnegz(maxBufferSize))
2048 	{
2049 		deUint32	batches[4];
2050 		adjustBatchCount(vkd, device, m_structs, m_infos, maxBufferSize, batches);
2051 		batchStructCount	= batches[0];
2052 		// batches[1]: batchScratchCount
2053 		batchVertexCount	= batches[2];
2054 		batchIndexCount		= batches[3];
2055 	}
2056 
2057 	deUint32		iStr				= 0;
2058 	deUint32		iVertex				= 0;
2059 	deUint32		iIndex				= 0;
2060 
2061 	VkDeviceSize	strSize				= 0;
2062 	VkDeviceSize	updateScratchSize	= 0;
2063 	VkDeviceSize	buildScratchSize	= 0;
2064 	VkDeviceSize	maxBuildScratchSize	= 0;
2065 	VkDeviceSize	vertexSize			= 0;
2066 	VkDeviceSize	indexSize			= 0;
2067 
2068 	VkDeviceSize	strOffset			= 0;
2069 	VkDeviceSize	vertexOffset		= 0;
2070 	VkDeviceSize	indexOffset			= 0;
2071 
2072 	deUint32		hostStructCount		= 0;
2073 	deUint32		deviceStructCount	= 0;
2074 
2075 	for (; iStr < allStructsCount; ++iStr)
2076 	{
2077 		BottomLevelAccelerationStructurePoolMember::Info info{};
2078 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
2079 		std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vkd, device, m_infos[iStr].structureSize);
2080 
2081 		++(str.getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR ? hostStructCount : deviceStructCount);
2082 
2083 		{
2084 			const VkDeviceSize	alignedStrSize	= deAlign64(strSize, 256);
2085 			const deUint32		accStrIndex		= (iStr / batchStructCount);
2086 			if (iStr != 0 && (iStr % batchStructCount) == 0)
2087 			{
2088 				strOffset				= 0;
2089 			}
2090 
2091 			info.accStrIndex			= accStrIndex;
2092 			info.accStrOffset			= strOffset;
2093 			accStrSizes[accStrIndex]	+= alignedStrSize;
2094 			strOffset					+= alignedStrSize;
2095 			m_structsBuffSize			+= alignedStrSize;
2096 		}
2097 
2098 		if (buildScratchSize != 0)
2099 		{
2100 			maxBuildScratchSize = std::max(maxBuildScratchSize, make_unsigned(deAlign64(buildScratchSize, 256u)));
2101 
2102 			info.buildScratchBuffIndex		= 0;
2103 			info.buildScratchBuffOffset		= 0;
2104 		}
2105 
2106 		if (vertexSize != 0)
2107 		{
2108 			const VkDeviceSize	alignedVertBuffSize	= deAlign64(vertexSize, 8);
2109 			const deUint32		vertBuffIndex		= (iVertex / batchVertexCount);
2110 			if (iVertex != 0 && (iVertex % batchVertexCount) == 0)
2111 			{
2112 				vertexOffset				= 0;
2113 			}
2114 
2115 			info.vertBuffIndex				= vertBuffIndex;
2116 			info.vertBuffOffset				= vertexOffset;
2117 			vertBuffSizes[vertBuffIndex]	+= alignedVertBuffSize;
2118 			vertexOffset					+= alignedVertBuffSize;
2119 			m_verticesSize					+= alignedVertBuffSize;
2120 			iVertex							+= 1;
2121 		}
2122 
2123 		if (indexSize != 0)
2124 		{
2125 			const VkDeviceSize	alignedIndexBuffSize	= deAlign64(indexSize, 8);
2126 			const deUint32		indexBuffIndex			= (iIndex / batchIndexCount);
2127 			if (iIndex != 0 && (iIndex % batchIndexCount) == 0)
2128 			{
2129 				indexOffset					= 0;
2130 			}
2131 
2132 			info.indexBuffIndex				= indexBuffIndex;
2133 			info.indexBuffOffset			= indexOffset;
2134 			indexBuffSizes[indexBuffIndex]	+= alignedIndexBuffSize;
2135 			indexOffset						+= alignedIndexBuffSize;
2136 			m_indicesSize					+= alignedIndexBuffSize;
2137 			iIndex							+= 1;
2138 		}
2139 
2140 		str.preCreateSetSizesAndOffsets(info, strSize, updateScratchSize, buildScratchSize);
2141 	}
2142 
2143 	for (iterKey = 0; iterKey < static_cast<deUint32>(accStrSizes.size()); ++iterKey)
2144 	{
2145 		m_impl->m_accellerationStructureBuffers.emplace_back(createAccellerationStructureBuffer(accStrSizes.at(iterKey)));
2146 	}
2147 	for (iterKey = 0; iterKey < static_cast<deUint32>(vertBuffSizes.size()); ++iterKey)
2148 	{
2149 		m_impl->m_vertexBuffers.emplace_back(createVertexBuffer(vkd, device, allocator, vertBuffSizes.at(iterKey)));
2150 	}
2151 	for (iterKey = 0; iterKey < static_cast<deUint32>(indexBuffSizes.size()); ++iterKey)
2152 	{
2153 		m_impl->m_indexBuffers.emplace_back(createIndexBuffer(vkd, device, allocator, indexBuffSizes.at(iterKey)));
2154 	}
2155 
2156 	if (maxBuildScratchSize)
2157 	{
2158 		if (hostStructCount)	m_impl->m_hostScratchBuffer->resize(static_cast<size_t>(maxBuildScratchSize));
2159 		if (deviceStructCount)	m_impl->m_deviceScratchBuffer = createDeviceScratchBuffer(maxBuildScratchSize);
2160 
2161 		m_buildsScratchSize = maxBuildScratchSize;
2162 	}
2163 
2164 	for (iterKey = 0; iterKey < allStructsCount; ++iterKey)
2165 	{
2166 		auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iterKey].get());
2167 		str.createAccellerationStructure(vkd, device, m_infos[iterKey].deviceAddress);
2168 	}
2169 }
2170 
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandBuffer cmdBuffer)2171 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface&	vk,
2172 													   const VkDevice			device,
2173 													   VkCommandBuffer			cmdBuffer)
2174 {
2175 	for (const auto& str : m_structs)
2176 	{
2177 		str->build(vk, device, cmdBuffer);
2178 	}
2179 }
2180 
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandPool cmdPool,VkQueue queue,qpWatchDog * watchDog)2181 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface&	vk,
2182 													   const VkDevice			device,
2183 													   VkCommandPool			cmdPool,
2184 													   VkQueue					queue,
2185 													   qpWatchDog*				watchDog)
2186 {
2187 	const deUint32			limit	= 10000u;
2188 	const deUint32			count	= structCount();
2189 	std::vector<BlasPtr>	buildingOnDevice;
2190 
2191 	auto buildOnDevice = [&]() -> void
2192 	{
2193 		Move<VkCommandBuffer>	cmd = allocateCommandBuffer(vk, device, cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2194 
2195 		beginCommandBuffer(vk, *cmd, 0u);
2196 			for (const auto& str : buildingOnDevice)
2197 				str->build(vk, device, *cmd);
2198 		endCommandBuffer(vk, *cmd);
2199 
2200 		submitCommandsAndWait(vk, device, queue, *cmd);
2201 		vk.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2202 	};
2203 
2204 	buildingOnDevice.reserve(limit);
2205 	for (deUint32 i = 0; i < count; ++i)
2206 	{
2207 		auto str = m_structs[i];
2208 
2209 		if (str->getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
2210 			str->build(vk, device, DE_NULL);
2211 		else
2212 			buildingOnDevice.emplace_back(str);
2213 
2214 		if ( buildingOnDevice.size() == limit || (count - 1) == i)
2215 		{
2216 			buildOnDevice();
2217 			buildingOnDevice.clear();
2218 		}
2219 
2220 		if ((i % WATCHDOG_INTERVAL) == 0 && watchDog)
2221 			qpWatchDog_touch(watchDog);
2222 	}
2223 }
2224 
computeBuildSize(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize strSize) const2225 auto BottomLevelAccelerationStructurePoolMember::computeBuildSize (const DeviceInterface&	vk,
2226 																   const VkDevice			device,
2227 																   const VkDeviceSize		strSize) const
2228 																   //              accStrSize,updateScratch,buildScratch, vertexSize, indexSize
2229 																   -> std::tuple<VkDeviceSize, VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize>
2230 {
2231 	DE_ASSERT(!m_geometriesData.empty() !=  !(strSize == 0)); // logical xor
2232 
2233 	std::tuple<VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize> result(deAlign64(strSize, 256), 0, 0, 0, 0);
2234 
2235 	if (!m_geometriesData.empty())
2236 	{
2237 		std::vector<VkAccelerationStructureGeometryKHR>			accelerationStructureGeometriesKHR;
2238 		std::vector<VkAccelerationStructureGeometryKHR*>		accelerationStructureGeometriesKHRPointers;
2239 		std::vector<VkAccelerationStructureBuildRangeInfoKHR>	accelerationStructureBuildRangeInfoKHR;
2240 		std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
2241 		std::vector<deUint32>									maxPrimitiveCounts;
2242 		prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
2243 
2244 		const VkAccelerationStructureGeometryKHR*				accelerationStructureGeometriesKHRPointer	= accelerationStructureGeometriesKHR.data();
2245 		const VkAccelerationStructureGeometryKHR* const*		accelerationStructureGeometry				= accelerationStructureGeometriesKHRPointers.data();
2246 
2247 		VkAccelerationStructureBuildGeometryInfoKHR	accelerationStructureBuildGeometryInfoKHR	=
2248 		{
2249 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,			//  VkStructureType										sType;
2250 			DE_NULL,																	//  const void*											pNext;
2251 			VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,							//  VkAccelerationStructureTypeKHR						type;
2252 			m_buildFlags,																//  VkBuildAccelerationStructureFlagsKHR				flags;
2253 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,								//  VkBuildAccelerationStructureModeKHR					mode;
2254 			DE_NULL,																	//  VkAccelerationStructureKHR							srcAccelerationStructure;
2255 			DE_NULL,																	//  VkAccelerationStructureKHR							dstAccelerationStructure;
2256 			static_cast<deUint32>(accelerationStructureGeometriesKHR.size()),			//  deUint32											geometryCount;
2257 			m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,	//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2258 			m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,				//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2259 			makeDeviceOrHostAddressKHR(DE_NULL)											//  VkDeviceOrHostAddressKHR							scratchData;
2260 		};
2261 
2262 		VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2263 		{
2264 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2265 			DE_NULL,														//  const void*		pNext;
2266 			0,																//  VkDeviceSize	accelerationStructureSize;
2267 			0,																//  VkDeviceSize	updateScratchSize;
2268 			0																//  VkDeviceSize	buildScratchSize;
2269 		};
2270 
2271 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2272 
2273 		std::get<0>(result) = sizeInfo.accelerationStructureSize;
2274 		std::get<1>(result) = sizeInfo.updateScratchSize;
2275 		std::get<2>(result) = sizeInfo.buildScratchSize;
2276 		std::get<3>(result) = getVertexBufferSize(m_geometriesData);
2277 		std::get<4>(result) = getIndexBufferSize(m_geometriesData);
2278 	}
2279 
2280 	return result;
2281 }
2282 
preCreateSetSizesAndOffsets(const Info & info,const VkDeviceSize accStrSize,const VkDeviceSize updateScratchSize,const VkDeviceSize buildScratchSize)2283 void BottomLevelAccelerationStructurePoolMember::preCreateSetSizesAndOffsets (const Info&			info,
2284 																			  const VkDeviceSize	accStrSize,
2285 																			  const VkDeviceSize	updateScratchSize,
2286 																			  const VkDeviceSize	buildScratchSize)
2287 {
2288 	m_info				= info;
2289 	m_structureSize		= accStrSize;
2290 	m_updateScratchSize	= updateScratchSize;
2291 	m_buildScratchSize	= buildScratchSize;
2292 }
2293 
createAccellerationStructure(const DeviceInterface & vk,const VkDevice device,VkDeviceAddress deviceAddress)2294 void BottomLevelAccelerationStructurePoolMember::createAccellerationStructure (const DeviceInterface&	vk,
2295 																			   const VkDevice			device,
2296 																			   VkDeviceAddress			deviceAddress)
2297 {
2298 	const VkAccelerationStructureTypeKHR		structureType						= (m_createGeneric
2299 																					   ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2300 																					   : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
2301 	const VkAccelerationStructureCreateInfoKHR	accelerationStructureCreateInfoKHR
2302 	{
2303 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,						//  VkStructureType											sType;
2304 		DE_NULL,																		//  const void*												pNext;
2305 		m_createFlags,																	//  VkAccelerationStructureCreateFlagsKHR					createFlags;
2306 		getAccelerationStructureBuffer()->get(),										//  VkBuffer												buffer;
2307 		getAccelerationStructureBufferOffset(),											//  VkDeviceSize											offset;
2308 		m_structureSize,																//  VkDeviceSize											size;
2309 		structureType,																	//  VkAccelerationStructureTypeKHR							type;
2310 		deviceAddress																	//  VkDeviceAddress											deviceAddress;
2311 	};
2312 
2313 	m_accelerationStructureKHR	= createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2314 }
2315 
~TopLevelAccelerationStructure()2316 TopLevelAccelerationStructure::~TopLevelAccelerationStructure ()
2317 {
2318 }
2319 
TopLevelAccelerationStructure()2320 TopLevelAccelerationStructure::TopLevelAccelerationStructure ()
2321 	: m_structureSize		(0u)
2322 	, m_updateScratchSize	(0u)
2323 	, m_buildScratchSize	(0u)
2324 {
2325 }
2326 
setInstanceCount(const size_t instanceCount)2327 void TopLevelAccelerationStructure::setInstanceCount (const size_t instanceCount)
2328 {
2329 	m_bottomLevelInstances.reserve(instanceCount);
2330 	m_instanceData.reserve(instanceCount);
2331 }
2332 
addInstance(de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,const VkTransformMatrixKHR & matrix,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags)2333 void TopLevelAccelerationStructure::addInstance (de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelStructure,
2334 												 const VkTransformMatrixKHR&						matrix,
2335 												 deUint32											instanceCustomIndex,
2336 												 deUint32											mask,
2337 												 deUint32											instanceShaderBindingTableRecordOffset,
2338 												 VkGeometryInstanceFlagsKHR							flags)
2339 {
2340 	m_bottomLevelInstances.push_back(bottomLevelStructure);
2341 	m_instanceData.push_back(InstanceData(matrix, instanceCustomIndex, mask, instanceShaderBindingTableRecordOffset, flags));
2342 }
2343 
getStructureBuildSizes() const2344 VkAccelerationStructureBuildSizesInfoKHR TopLevelAccelerationStructure::getStructureBuildSizes () const
2345 {
2346 	return
2347 	{
2348 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2349 		DE_NULL,														//  const void*		pNext;
2350 		m_structureSize,												//  VkDeviceSize	accelerationStructureSize;
2351 		m_updateScratchSize,											//  VkDeviceSize	updateScratchSize;
2352 		m_buildScratchSize												//  VkDeviceSize	buildScratchSize;
2353 	};
2354 }
2355 
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)2356 void TopLevelAccelerationStructure::createAndBuild (const DeviceInterface&	vk,
2357 													const VkDevice			device,
2358 													const VkCommandBuffer	cmdBuffer,
2359 													Allocator&				allocator,
2360 													VkDeviceAddress			deviceAddress)
2361 {
2362 	create(vk, device, allocator, 0u, deviceAddress);
2363 	build(vk, device, cmdBuffer);
2364 }
2365 
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,TopLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)2366 void TopLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&				vk,
2367 													   const VkDevice						device,
2368 													   const VkCommandBuffer				cmdBuffer,
2369 													   Allocator&							allocator,
2370 													   TopLevelAccelerationStructure*		accelerationStructure,
2371 													   VkDeviceSize							compactCopySize,
2372 													   VkDeviceAddress						deviceAddress)
2373 {
2374 	DE_ASSERT(accelerationStructure != NULL);
2375 	VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
2376 	DE_ASSERT(copiedSize != 0u);
2377 
2378 	create(vk, device, allocator, copiedSize, deviceAddress);
2379 	copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
2380 }
2381 
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)2382 void TopLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface&					vk,
2383 															  const VkDevice							device,
2384 															  const VkCommandBuffer						cmdBuffer,
2385 															  Allocator&								allocator,
2386 															  SerialStorage*							storage,
2387 															  VkDeviceAddress							deviceAddress)
2388 {
2389 	DE_ASSERT(storage != NULL);
2390 	DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
2391 	create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
2392 	if (storage->hasDeepFormat()) createAndDeserializeBottoms(vk, device, cmdBuffer, allocator, storage);
2393 	deserialize(vk, device, cmdBuffer, storage);
2394 }
2395 
createInstanceBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelInstances,std::vector<InstanceData> instanceData,const bool tryCachedMemory)2396 BufferWithMemory* createInstanceBuffer (const DeviceInterface&											vk,
2397 										const VkDevice													device,
2398 										Allocator&														allocator,
2399 										std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	bottomLevelInstances,
2400 										std::vector<InstanceData>										instanceData,
2401 										const bool														tryCachedMemory)
2402 {
2403 	DE_ASSERT(bottomLevelInstances.size() != 0);
2404 	DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2405 	DE_UNREF(instanceData);
2406 
2407 	BufferWithMemory*			result				= nullptr;
2408 	const VkDeviceSize			bufferSizeBytes		= bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2409 	const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2410 	if (tryCachedMemory) try
2411 	{
2412 		result = new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2413 	}
2414 	catch (const tcu::NotSupportedError&)
2415 	{
2416 		result = nullptr;
2417 	}
2418 	return result
2419 			? result
2420 			: new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2421 }
2422 
updateSingleInstance(const DeviceInterface & vk,const VkDevice device,const BottomLevelAccelerationStructure & bottomLevelAccelerationStructure,const InstanceData & instanceData,deUint8 * bufferLocation,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2423 void updateSingleInstance (const DeviceInterface&					vk,
2424 						   const VkDevice							device,
2425 						   const BottomLevelAccelerationStructure&	bottomLevelAccelerationStructure,
2426 						   const InstanceData&						instanceData,
2427 						   deUint8*									bufferLocation,
2428 						   VkAccelerationStructureBuildTypeKHR		buildType,
2429 						   bool										inactiveInstances)
2430 {
2431 	const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
2432 
2433 	// This part needs to be fixed once a new version of the VkAccelerationStructureInstanceKHR will be added to vkStructTypes.inl
2434 	VkDeviceAddress accelerationStructureAddress;
2435 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2436 	{
2437 		VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
2438 		{
2439 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,	// VkStructureType				sType;
2440 			DE_NULL,															// const void*					pNext;
2441 			accelerationStructureKHR											// VkAccelerationStructureKHR	accelerationStructure;
2442 		};
2443 		accelerationStructureAddress = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
2444 	}
2445 
2446 	deUint64 structureReference;
2447 	if (inactiveInstances)
2448 	{
2449 		// Instances will be marked inactive by making their references VK_NULL_HANDLE or having address zero.
2450 		structureReference = 0ull;
2451 	}
2452 	else
2453 	{
2454 		structureReference	= (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2455 							? deUint64(accelerationStructureAddress)
2456 							: deUint64(accelerationStructureKHR.getInternal());
2457 	}
2458 
2459 	VkAccelerationStructureInstanceKHR	accelerationStructureInstanceKHR = makeVkAccelerationStructureInstanceKHR
2460 	(
2461 		instanceData.matrix,									//  VkTransformMatrixKHR		transform;
2462 		instanceData.instanceCustomIndex,						//  deUint32					instanceCustomIndex:24;
2463 		instanceData.mask,										//  deUint32					mask:8;
2464 		instanceData.instanceShaderBindingTableRecordOffset,	//  deUint32					instanceShaderBindingTableRecordOffset:24;
2465 		instanceData.flags,										//  VkGeometryInstanceFlagsKHR	flags:8;
2466 		structureReference										//  deUint64					accelerationStructureReference;
2467 	);
2468 
2469 	deMemcpy(bufferLocation, &accelerationStructureInstanceKHR, sizeof(VkAccelerationStructureInstanceKHR));
2470 }
2471 
updateInstanceBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelInstances,const std::vector<InstanceData> & instanceData,const BufferWithMemory * instanceBuffer,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2472 void updateInstanceBuffer (const DeviceInterface&												vk,
2473 						   const VkDevice														device,
2474 						   const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>&	bottomLevelInstances,
2475 						   const std::vector<InstanceData>&										instanceData,
2476 						   const BufferWithMemory*												instanceBuffer,
2477 						   VkAccelerationStructureBuildTypeKHR									buildType,
2478 						   bool																	inactiveInstances)
2479 {
2480 	DE_ASSERT(bottomLevelInstances.size() != 0);
2481 	DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2482 
2483 	auto&			instancesAlloc		= instanceBuffer->getAllocation();
2484 	auto			bufferStart			= reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2485 	VkDeviceSize	bufferOffset		= 0ull;
2486 
2487 	for (size_t instanceNdx = 0; instanceNdx < bottomLevelInstances.size(); ++instanceNdx)
2488 	{
2489 		const auto& blas = *bottomLevelInstances[instanceNdx];
2490 		updateSingleInstance(vk, device, blas, instanceData[instanceNdx], bufferStart + bufferOffset, buildType, inactiveInstances);
2491 		bufferOffset += sizeof(VkAccelerationStructureInstanceKHR);
2492 	}
2493 
2494 	flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2495 }
2496 
2497 class TopLevelAccelerationStructureKHR : public TopLevelAccelerationStructure
2498 {
2499 public:
2500 	static deUint32											getRequiredAllocationCount							(void);
2501 
2502 															TopLevelAccelerationStructureKHR					();
2503 															TopLevelAccelerationStructureKHR					(const TopLevelAccelerationStructureKHR&		other) = delete;
2504 	virtual													~TopLevelAccelerationStructureKHR					();
2505 
2506 	void													setBuildType										(const VkAccelerationStructureBuildTypeKHR		buildType) override;
2507 	void													setCreateFlags										(const VkAccelerationStructureCreateFlagsKHR	createFlags) override;
2508 	void													setCreateGeneric									(bool											createGeneric) override;
2509 	void													setCreationBufferUnbounded							(bool											creationBufferUnbounded) override;
2510 	void													setBuildFlags										(const VkBuildAccelerationStructureFlagsKHR		buildFlags) override;
2511 	void													setBuildWithoutPrimitives							(bool											buildWithoutPrimitives) override;
2512 	void													setInactiveInstances								(bool											inactiveInstances) override;
2513 	void													setDeferredOperation								(const bool										deferredOperation,
2514 																												 const deUint32									workerThreadCount) override;
2515 	void													setUseArrayOfPointers								(const bool										useArrayOfPointers) override;
2516 	void													setIndirectBuildParameters							(const VkBuffer									indirectBuffer,
2517 																												 const VkDeviceSize								indirectBufferOffset,
2518 																												 const deUint32									indirectBufferStride) override;
2519 	void													setUsePPGeometries									(const bool										usePPGeometries) override;
2520 	void													setTryCachedMemory									(const bool										tryCachedMemory) override;
2521 	VkBuildAccelerationStructureFlagsKHR					getBuildFlags										() const override;
2522 
2523 	void													getCreationSizes									(const DeviceInterface&							vk,
2524 																												 const VkDevice									device,
2525 																												 const VkDeviceSize								structureSize,
2526 																												 CreationSizes&									sizes) override;
2527 	void													create												(const DeviceInterface&							vk,
2528 																												 const VkDevice									device,
2529 																												 Allocator&										allocator,
2530 																												 VkDeviceSize									structureSize,
2531 																												 VkDeviceAddress								deviceAddress			= 0u,
2532 																												 const void*									pNext					= DE_NULL,
2533 																												 const MemoryRequirement&						addMemoryRequirement	= MemoryRequirement::Any,
2534 																												 const VkBuffer									creationBuffer			= VK_NULL_HANDLE,
2535 																												 const VkDeviceSize								creationBufferSize		= 0u) override;
2536 	void													build												(const DeviceInterface&							vk,
2537 																												 const VkDevice									device,
2538 																												 const VkCommandBuffer							cmdBuffer,
2539 																												 TopLevelAccelerationStructure*					srcAccelerationStructure = DE_NULL) override;
2540 	void													copyFrom											(const DeviceInterface&							vk,
2541 																												 const VkDevice									device,
2542 																												 const VkCommandBuffer							cmdBuffer,
2543 																												 TopLevelAccelerationStructure*					accelerationStructure,
2544 																												 bool											compactCopy) override;
2545 	void													serialize											(const DeviceInterface&							vk,
2546 																												 const VkDevice									device,
2547 																												 const VkCommandBuffer							cmdBuffer,
2548 																												 SerialStorage*									storage) override;
2549 	void													deserialize											(const DeviceInterface&							vk,
2550 																												 const VkDevice									device,
2551 																												 const VkCommandBuffer							cmdBuffer,
2552 																												 SerialStorage*									storage) override;
2553 
2554 	std::vector<VkDeviceSize>								getSerializingSizes									(const DeviceInterface&							vk,
2555 																												 const VkDevice									device,
2556 																												 const VkQueue									queue,
2557 																												 const deUint32									queueFamilyIndex) override;
2558 
2559 	std::vector<deUint64>									getSerializingAddresses								(const DeviceInterface&							vk,
2560 																												 const VkDevice									device) const override;
2561 
2562 
2563 	const VkAccelerationStructureKHR*						getPtr												(void) const override;
2564 
2565 	void													updateInstanceMatrix								(const DeviceInterface&							vk,
2566 																												 const VkDevice									device,
2567 																												 size_t											instanceIndex,
2568 																												 const VkTransformMatrixKHR&					matrix) override;
2569 
2570 protected:
2571 	VkAccelerationStructureBuildTypeKHR						m_buildType;
2572 	VkAccelerationStructureCreateFlagsKHR					m_createFlags;
2573 	bool													m_createGeneric;
2574 	bool													m_creationBufferUnbounded;
2575 	VkBuildAccelerationStructureFlagsKHR					m_buildFlags;
2576 	bool													m_buildWithoutPrimitives;
2577 	bool													m_inactiveInstances;
2578 	bool													m_deferredOperation;
2579 	deUint32												m_workerThreadCount;
2580 	bool													m_useArrayOfPointers;
2581 	de::MovePtr<BufferWithMemory>							m_accelerationStructureBuffer;
2582 	de::MovePtr<BufferWithMemory>							m_instanceBuffer;
2583 	de::MovePtr<BufferWithMemory>							m_instanceAddressBuffer;
2584 	de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
2585 	std::vector<deUint8>									m_hostScratchBuffer;
2586 	Move<VkAccelerationStructureKHR>						m_accelerationStructureKHR;
2587 	VkBuffer												m_indirectBuffer;
2588 	VkDeviceSize											m_indirectBufferOffset;
2589 	deUint32												m_indirectBufferStride;
2590 	bool													m_usePPGeometries;
2591 	bool													m_tryCachedMemory;
2592 
2593 
2594 	void													prepareInstances									(const DeviceInterface&							vk,
2595 																												 const VkDevice									device,
2596 																												 VkAccelerationStructureGeometryKHR&			accelerationStructureGeometryKHR,
2597 																												 std::vector<deUint32>&							maxPrimitiveCounts);
2598 
2599 	void													serializeBottoms									(const DeviceInterface&							vk,
2600 																												 const VkDevice									device,
2601 																												 const VkCommandBuffer							cmdBuffer,
2602 																												 SerialStorage*									storage,
2603 																												 VkDeferredOperationKHR							deferredOperation);
2604 
2605 	void													createAndDeserializeBottoms							(const DeviceInterface&							vk,
2606 																												 const VkDevice									device,
2607 																												 const VkCommandBuffer							cmdBuffer,
2608 																												 Allocator&										allocator,
2609 																												 SerialStorage*									storage) override;
2610 };
2611 
getRequiredAllocationCount(void)2612 deUint32 TopLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
2613 {
2614 	/*
2615 		de::MovePtr<BufferWithMemory>							m_instanceBuffer;
2616 		de::MovePtr<Allocation>									m_accelerationStructureAlloc;
2617 		de::MovePtr<BufferWithMemory>							m_deviceScratchBuffer;
2618 	*/
2619 	return 3u;
2620 }
2621 
TopLevelAccelerationStructureKHR()2622 TopLevelAccelerationStructureKHR::TopLevelAccelerationStructureKHR ()
2623 	: TopLevelAccelerationStructure	()
2624 	, m_buildType					(VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2625 	, m_createFlags					(0u)
2626 	, m_createGeneric				(false)
2627 	, m_creationBufferUnbounded		(false)
2628 	, m_buildFlags					(0u)
2629 	, m_buildWithoutPrimitives		(false)
2630 	, m_inactiveInstances			(false)
2631 	, m_deferredOperation			(false)
2632 	, m_workerThreadCount			(0)
2633 	, m_useArrayOfPointers			(false)
2634 	, m_accelerationStructureBuffer	(DE_NULL)
2635 	, m_instanceBuffer				(DE_NULL)
2636 	, m_instanceAddressBuffer		(DE_NULL)
2637 	, m_deviceScratchBuffer			(DE_NULL)
2638 	, m_accelerationStructureKHR	()
2639 	, m_indirectBuffer				(DE_NULL)
2640 	, m_indirectBufferOffset		(0)
2641 	, m_indirectBufferStride		(0)
2642 	, m_usePPGeometries				(false)
2643 	, m_tryCachedMemory				(true)
2644 {
2645 }
2646 
~TopLevelAccelerationStructureKHR()2647 TopLevelAccelerationStructureKHR::~TopLevelAccelerationStructureKHR ()
2648 {
2649 }
2650 
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)2651 void TopLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR	buildType)
2652 {
2653 	m_buildType = buildType;
2654 }
2655 
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)2656 void TopLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR	createFlags)
2657 {
2658 	m_createFlags = createFlags;
2659 }
2660 
setCreateGeneric(bool createGeneric)2661 void TopLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
2662 {
2663 	m_createGeneric = createGeneric;
2664 }
2665 
setCreationBufferUnbounded(bool creationBufferUnbounded)2666 void TopLevelAccelerationStructureKHR::setCreationBufferUnbounded (bool creationBufferUnbounded)
2667 {
2668 	m_creationBufferUnbounded = creationBufferUnbounded;
2669 }
2670 
setInactiveInstances(bool inactiveInstances)2671 void TopLevelAccelerationStructureKHR::setInactiveInstances (bool inactiveInstances)
2672 {
2673 	m_inactiveInstances = inactiveInstances;
2674 }
2675 
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)2676 void TopLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR	buildFlags)
2677 {
2678 	m_buildFlags = buildFlags;
2679 }
2680 
setBuildWithoutPrimitives(bool buildWithoutPrimitives)2681 void TopLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
2682 {
2683 	m_buildWithoutPrimitives = buildWithoutPrimitives;
2684 }
2685 
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)2686 void TopLevelAccelerationStructureKHR::setDeferredOperation (const bool		deferredOperation,
2687 															 const deUint32	workerThreadCount)
2688 {
2689 	m_deferredOperation = deferredOperation;
2690 	m_workerThreadCount = workerThreadCount;
2691 }
2692 
setUseArrayOfPointers(const bool useArrayOfPointers)2693 void TopLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool	useArrayOfPointers)
2694 {
2695 	m_useArrayOfPointers = useArrayOfPointers;
2696 }
2697 
setUsePPGeometries(const bool usePPGeometries)2698 void TopLevelAccelerationStructureKHR::setUsePPGeometries (const bool usePPGeometries)
2699 {
2700 	m_usePPGeometries = usePPGeometries;
2701 }
2702 
setTryCachedMemory(const bool tryCachedMemory)2703 void TopLevelAccelerationStructureKHR::setTryCachedMemory (const bool tryCachedMemory)
2704 {
2705 	m_tryCachedMemory = tryCachedMemory;
2706 }
2707 
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)2708 void TopLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer		indirectBuffer,
2709 																   const VkDeviceSize	indirectBufferOffset,
2710 																   const deUint32		indirectBufferStride)
2711 {
2712 	m_indirectBuffer		= indirectBuffer;
2713 	m_indirectBufferOffset	= indirectBufferOffset;
2714 	m_indirectBufferStride	= indirectBufferStride;
2715 }
2716 
getBuildFlags() const2717 VkBuildAccelerationStructureFlagsKHR TopLevelAccelerationStructureKHR::getBuildFlags () const
2718 {
2719 	return m_buildFlags;
2720 }
2721 
sum() const2722 VkDeviceSize TopLevelAccelerationStructure::CreationSizes::sum () const
2723 {
2724 	return structure + updateScratch + buildScratch + instancePointers + instancesBuffer;
2725 }
2726 
getCreationSizes(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize structureSize,CreationSizes & sizes)2727 void TopLevelAccelerationStructureKHR::getCreationSizes (const DeviceInterface&	vk,
2728 														 const VkDevice			device,
2729 														 const VkDeviceSize		structureSize,
2730 														 CreationSizes&			sizes)
2731 {
2732 	// AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2733 	// or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2734 	DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2735 
2736 	if (structureSize == 0)
2737 	{
2738 		VkAccelerationStructureGeometryKHR		accelerationStructureGeometryKHR;
2739 		const auto								accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2740 		std::vector<deUint32>					maxPrimitiveCounts;
2741 		prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2742 
2743 		VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR		=
2744 		{
2745 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,						//  VkStructureType										sType;
2746 			DE_NULL,																				//  const void*											pNext;
2747 			VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,											//  VkAccelerationStructureTypeKHR						type;
2748 			m_buildFlags,																			//  VkBuildAccelerationStructureFlagsKHR				flags;
2749 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,											//  VkBuildAccelerationStructureModeKHR					mode;
2750 			DE_NULL,																				//  VkAccelerationStructureKHR							srcAccelerationStructure;
2751 			DE_NULL,																				//  VkAccelerationStructureKHR							dstAccelerationStructure;
2752 			1u,																						//  deUint32											geometryCount;
2753 			(m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),						//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2754 			(m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),					//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2755 			makeDeviceOrHostAddressKHR(DE_NULL)														//  VkDeviceOrHostAddressKHR							scratchData;
2756 		};
2757 
2758 		VkAccelerationStructureBuildSizesInfoKHR	sizeInfo =
2759 		{
2760 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2761 			DE_NULL,														//  const void*		pNext;
2762 			0,																//  VkDeviceSize	accelerationStructureSize;
2763 			0,																//  VkDeviceSize	updateScratchSize;
2764 			0																//  VkDeviceSize	buildScratchSize;
2765 		};
2766 
2767 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2768 
2769 		sizes.structure		= sizeInfo.accelerationStructureSize;
2770 		sizes.updateScratch	= sizeInfo.updateScratchSize;
2771 		sizes.buildScratch	= sizeInfo.buildScratchSize;
2772 	}
2773 	else
2774 	{
2775 		sizes.structure		= structureSize;
2776 		sizes.updateScratch	= 0u;
2777 		sizes.buildScratch	= 0u;
2778 	}
2779 
2780 	sizes.instancePointers	= 0u;
2781 	if (m_useArrayOfPointers)
2782 	{
2783 		const size_t	pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2784 		sizes.instancePointers		= static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize);
2785 	}
2786 
2787 	sizes.instancesBuffer = m_bottomLevelInstances.empty() ? 0u : m_bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2788 }
2789 
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement,const VkBuffer creationBuffer,const VkDeviceSize creationBufferSize)2790 void TopLevelAccelerationStructureKHR::create (const DeviceInterface&				vk,
2791 											   const VkDevice						device,
2792 											   Allocator&							allocator,
2793 											   VkDeviceSize							structureSize,
2794 											   VkDeviceAddress						deviceAddress,
2795 											   const void*							pNext,
2796 											   const MemoryRequirement&				addMemoryRequirement,
2797 											   const VkBuffer						creationBuffer,
2798 											   const VkDeviceSize					creationBufferSize)
2799 {
2800 	// AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2801 	// or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2802 	DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2803 
2804 	if (structureSize == 0)
2805 	{
2806 		VkAccelerationStructureGeometryKHR		accelerationStructureGeometryKHR;
2807 		const auto								accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2808 		std::vector<deUint32>					maxPrimitiveCounts;
2809 		prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2810 
2811 		VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR		=
2812 		{
2813 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,						//  VkStructureType										sType;
2814 			DE_NULL,																				//  const void*											pNext;
2815 			VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,											//  VkAccelerationStructureTypeKHR						type;
2816 			m_buildFlags,																			//  VkBuildAccelerationStructureFlagsKHR				flags;
2817 			VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,											//  VkBuildAccelerationStructureModeKHR					mode;
2818 			DE_NULL,																				//  VkAccelerationStructureKHR							srcAccelerationStructure;
2819 			DE_NULL,																				//  VkAccelerationStructureKHR							dstAccelerationStructure;
2820 			1u,																						//  deUint32											geometryCount;
2821 			(m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),						//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2822 			(m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),					//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2823 			makeDeviceOrHostAddressKHR(DE_NULL)														//  VkDeviceOrHostAddressKHR							scratchData;
2824 		};
2825 
2826 		VkAccelerationStructureBuildSizesInfoKHR	sizeInfo =
2827 		{
2828 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,	//  VkStructureType	sType;
2829 			DE_NULL,														//  const void*		pNext;
2830 			0,																//  VkDeviceSize	accelerationStructureSize;
2831 			0,																//  VkDeviceSize	updateScratchSize;
2832 			0																//  VkDeviceSize	buildScratchSize;
2833 		};
2834 
2835 		vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2836 
2837 		m_structureSize		= sizeInfo.accelerationStructureSize;
2838 		m_updateScratchSize	= sizeInfo.updateScratchSize;
2839 		m_buildScratchSize	= sizeInfo.buildScratchSize;
2840 	}
2841 	else
2842 	{
2843 		m_structureSize		= structureSize;
2844 		m_updateScratchSize	= 0u;
2845 		m_buildScratchSize	= 0u;
2846 	}
2847 
2848 	const bool externalCreationBuffer = (creationBuffer != VK_NULL_HANDLE);
2849 
2850 	if (externalCreationBuffer)
2851 	{
2852 		DE_UNREF(creationBufferSize); // For release builds.
2853 		DE_ASSERT(creationBufferSize >= m_structureSize);
2854 	}
2855 
2856 	if (!externalCreationBuffer)
2857 	{
2858 		const VkBufferCreateInfo	bufferCreateInfo	= makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2859 		const MemoryRequirement		memoryRequirement	= addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
2860 		const bool					bindMemOnCreation	= (!m_creationBufferUnbounded);
2861 
2862 		try
2863 		{
2864 			m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, (MemoryRequirement::Cached | memoryRequirement), bindMemOnCreation));
2865 		}
2866 		catch (const tcu::NotSupportedError&)
2867 		{
2868 			// retry without Cached flag
2869 			m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement, bindMemOnCreation));
2870 		}
2871 	}
2872 
2873 	const auto createInfoBuffer = (externalCreationBuffer ? creationBuffer : m_accelerationStructureBuffer->get());
2874 	{
2875 		const VkAccelerationStructureTypeKHR		structureType						= (m_createGeneric
2876 																						   ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2877 																						   : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
2878 		const VkAccelerationStructureCreateInfoKHR	accelerationStructureCreateInfoKHR	=
2879 		{
2880 			VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,	//  VkStructureType											sType;
2881 			pNext,														//  const void*												pNext;
2882 			m_createFlags,												//  VkAccelerationStructureCreateFlagsKHR					createFlags;
2883 			createInfoBuffer,											//  VkBuffer												buffer;
2884 			0u,															//  VkDeviceSize											offset;
2885 			m_structureSize,											//  VkDeviceSize											size;
2886 			structureType,												//  VkAccelerationStructureTypeKHR							type;
2887 			deviceAddress												//  VkDeviceAddress											deviceAddress;
2888 		};
2889 
2890 		m_accelerationStructureKHR	= createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2891 
2892 		// Make sure buffer memory is always bound after creation.
2893 		if (!externalCreationBuffer)
2894 			m_accelerationStructureBuffer->bindMemory();
2895 	}
2896 
2897 	if (m_buildScratchSize > 0u)
2898 	{
2899 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2900 		{
2901 			const VkBufferCreateInfo		bufferCreateInfo	= makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2902 			m_deviceScratchBuffer								= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2903 		}
2904 		else
2905 		{
2906 			m_hostScratchBuffer.resize(static_cast<size_t>(m_buildScratchSize));
2907 		}
2908 	}
2909 
2910 	if (m_useArrayOfPointers)
2911 	{
2912 		const size_t				pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2913 		const VkBufferCreateInfo	bufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2914 		m_instanceAddressBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2915 	}
2916 
2917 	if(!m_bottomLevelInstances.empty())
2918 		m_instanceBuffer = de::MovePtr<BufferWithMemory>(createInstanceBuffer(vk, device, allocator, m_bottomLevelInstances, m_instanceData, m_tryCachedMemory));
2919 }
2920 
updateInstanceMatrix(const DeviceInterface & vk,const VkDevice device,size_t instanceIndex,const VkTransformMatrixKHR & matrix)2921 void TopLevelAccelerationStructureKHR::updateInstanceMatrix (const DeviceInterface& vk, const VkDevice device, size_t instanceIndex, const VkTransformMatrixKHR& matrix)
2922 {
2923 	DE_ASSERT(instanceIndex < m_bottomLevelInstances.size());
2924 	DE_ASSERT(instanceIndex < m_instanceData.size());
2925 
2926 	const auto&		blas			= *m_bottomLevelInstances[instanceIndex];
2927 	auto&			instanceData	= m_instanceData[instanceIndex];
2928 	auto&			instancesAlloc	= m_instanceBuffer->getAllocation();
2929 	auto			bufferStart		= reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2930 	VkDeviceSize	bufferOffset	= sizeof(VkAccelerationStructureInstanceKHR) * instanceIndex;
2931 
2932 	instanceData.matrix = matrix;
2933 	updateSingleInstance(vk, device, blas, instanceData, bufferStart + bufferOffset, m_buildType, m_inactiveInstances);
2934 	flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2935 }
2936 
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,TopLevelAccelerationStructure * srcAccelerationStructure)2937 void TopLevelAccelerationStructureKHR::build (const DeviceInterface&			vk,
2938 											  const VkDevice					device,
2939 											  const VkCommandBuffer				cmdBuffer,
2940 											  TopLevelAccelerationStructure*	srcAccelerationStructure)
2941 {
2942 	DE_ASSERT(!m_bottomLevelInstances.empty());
2943 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2944 	DE_ASSERT(m_buildScratchSize != 0);
2945 
2946 	updateInstanceBuffer(vk, device, m_bottomLevelInstances, m_instanceData, m_instanceBuffer.get(), m_buildType, m_inactiveInstances);
2947 
2948 	VkAccelerationStructureGeometryKHR		accelerationStructureGeometryKHR;
2949 	const auto								accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2950 	std::vector<deUint32>					maxPrimitiveCounts;
2951 	prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2952 
2953 	VkDeviceOrHostAddressKHR				scratchData										= (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2954 																							? makeDeviceOrHostAddressKHR(vk, device, m_deviceScratchBuffer->get(), 0)
2955 																							: makeDeviceOrHostAddressKHR(m_hostScratchBuffer.data());
2956 
2957 	VkAccelerationStructureKHR				srcStructure									= (srcAccelerationStructure != DE_NULL) ? *(srcAccelerationStructure->getPtr()) : DE_NULL;
2958 	VkBuildAccelerationStructureModeKHR		mode											= (srcAccelerationStructure != DE_NULL) ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
2959 
2960 	VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR		=
2961 	{
2962 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,						//  VkStructureType										sType;
2963 		DE_NULL,																				//  const void*											pNext;
2964 		VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,											//  VkAccelerationStructureTypeKHR						type;
2965 		m_buildFlags,																			//  VkBuildAccelerationStructureFlagsKHR				flags;
2966 		mode,																					//  VkBuildAccelerationStructureModeKHR					mode;
2967 		srcStructure,																			//  VkAccelerationStructureKHR							srcAccelerationStructure;
2968 		m_accelerationStructureKHR.get(),														//  VkAccelerationStructureKHR							dstAccelerationStructure;
2969 		1u,																						//  deUint32											geometryCount;
2970 		(m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),						//  const VkAccelerationStructureGeometryKHR*			pGeometries;
2971 		(m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),					//  const VkAccelerationStructureGeometryKHR* const*	ppGeometries;
2972 		scratchData																				//  VkDeviceOrHostAddressKHR							scratchData;
2973 	};
2974 
2975 	const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : static_cast<deUint32>(m_bottomLevelInstances.size()));
2976 
2977 	VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfoKHR =
2978 	{
2979 		primitiveCount,	//  deUint32	primitiveCount;
2980 		0,				//  deUint32	primitiveOffset;
2981 		0,				//  deUint32	firstVertex;
2982 		0				//  deUint32	transformOffset;
2983 	};
2984 	VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr	= &accelerationStructureBuildRangeInfoKHR;
2985 
2986 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2987 	{
2988 		if (m_indirectBuffer == DE_NULL)
2989 			vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2990 		else
2991 		{
2992 			VkDeviceAddress	indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
2993 			deUint32*		pMaxPrimitiveCounts = maxPrimitiveCounts.data();
2994 			vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
2995 		}
2996 	}
2997 	else if (!m_deferredOperation)
2998 	{
2999 		VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
3000 	}
3001 	else
3002 	{
3003 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
3004 		const auto deferredOperation	= deferredOperationPtr.get();
3005 
3006 		VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
3007 
3008 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3009 
3010 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3011 
3012 		accelerationStructureBuildGeometryInfoKHR.pNext = DE_NULL;
3013 	}
3014 
3015 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3016 	{
3017 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3018 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
3019 
3020 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3021 	}
3022 }
3023 
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,TopLevelAccelerationStructure * accelerationStructure,bool compactCopy)3024 void TopLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&				vk,
3025 												 const VkDevice						device,
3026 												 const VkCommandBuffer				cmdBuffer,
3027 												 TopLevelAccelerationStructure*		accelerationStructure,
3028 												 bool								compactCopy)
3029 {
3030 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
3031 	DE_ASSERT(accelerationStructure != DE_NULL);
3032 
3033 	VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
3034 	{
3035 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,															// VkStructureType						sType;
3036 		DE_NULL,																										// const void*							pNext;
3037 		*(accelerationStructure->getPtr()),																				// VkAccelerationStructureKHR			src;
3038 		*(getPtr()),																									// VkAccelerationStructureKHR			dst;
3039 		compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR	// VkCopyAccelerationStructureModeKHR	mode;
3040 	};
3041 
3042 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3043 	{
3044 		vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
3045 	}
3046 	else if (!m_deferredOperation)
3047 	{
3048 		VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
3049 	}
3050 	else
3051 	{
3052 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
3053 		const auto deferredOperation	= deferredOperationPtr.get();
3054 
3055 		VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
3056 
3057 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3058 
3059 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3060 	}
3061 
3062 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3063 	{
3064 		const VkAccessFlags		accessMasks	= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3065 		const VkMemoryBarrier	memBarrier	= makeMemoryBarrier(accessMasks, accessMasks);
3066 
3067 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3068 	}
3069 
3070 }
3071 
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)3072 void TopLevelAccelerationStructureKHR::serialize (const DeviceInterface&	vk,
3073 												  const VkDevice			device,
3074 												  const VkCommandBuffer		cmdBuffer,
3075 												  SerialStorage*			storage)
3076 {
3077 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
3078 	DE_ASSERT(storage != DE_NULL);
3079 
3080 	const VkCopyAccelerationStructureToMemoryInfoKHR	copyAccelerationStructureInfo	=
3081 	{
3082 		VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,	// VkStructureType						sType;
3083 		DE_NULL,															// const void*							pNext;
3084 		*(getPtr()),														// VkAccelerationStructureKHR			src;
3085 		storage->getAddress(vk, device, m_buildType),						// VkDeviceOrHostAddressKHR				dst;
3086 		VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR	mode;
3087 	};
3088 
3089 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3090 	{
3091 		vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
3092 		if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
3093 	}
3094 	else if (!m_deferredOperation)
3095 	{
3096 		VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
3097 		if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
3098 	}
3099 	else
3100 	{
3101 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
3102 		const auto deferredOperation	= deferredOperationPtr.get();
3103 
3104 		const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
3105 
3106 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3107 		if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, deferredOperation);
3108 
3109 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3110 	}
3111 }
3112 
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)3113 void TopLevelAccelerationStructureKHR::deserialize (const DeviceInterface&	vk,
3114 													const VkDevice			device,
3115 													const VkCommandBuffer	cmdBuffer,
3116 													SerialStorage*			storage)
3117 {
3118 	DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
3119 	DE_ASSERT(storage != DE_NULL);
3120 
3121 	const VkCopyMemoryToAccelerationStructureInfoKHR	copyAccelerationStructureInfo	=
3122 	{
3123 		VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,	// VkStructureType							sType;
3124 		DE_NULL,															// const void*								pNext;
3125 		storage->getAddressConst(vk, device, m_buildType),					// VkDeviceOrHostAddressConstKHR			src;
3126 		*(getPtr()),														// VkAccelerationStructureKHR				dst;
3127 		VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR					// VkCopyAccelerationStructureModeKHR		mode;
3128 	};
3129 
3130 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3131 	{
3132 		vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
3133 	}
3134 	else if (!m_deferredOperation)
3135 	{
3136 		VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
3137 	}
3138 	else
3139 	{
3140 		const auto deferredOperationPtr	= createDeferredOperationKHR(vk, device);
3141 		const auto deferredOperation	= deferredOperationPtr.get();
3142 
3143 		const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
3144 
3145 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3146 
3147 		finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3148 	}
3149 
3150 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3151 	{
3152 		const VkAccessFlags		accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3153 		const VkMemoryBarrier	memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
3154 
3155 		cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3156 	}
3157 }
3158 
serializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage,VkDeferredOperationKHR deferredOperation)3159 void TopLevelAccelerationStructureKHR::serializeBottoms (const DeviceInterface&	vk,
3160 														 const VkDevice			device,
3161 														 const VkCommandBuffer	cmdBuffer,
3162 														 SerialStorage*			storage,
3163 														 VkDeferredOperationKHR	deferredOperation)
3164 {
3165 	DE_UNREF(deferredOperation);
3166 	DE_ASSERT(storage->hasDeepFormat());
3167 
3168 	const std::vector<deUint64>&	addresses		= storage->getSerialInfo().addresses();
3169 	const std::size_t				cbottoms		= m_bottomLevelInstances.size();
3170 
3171 	deUint32						storageIndex	= 0;
3172 	std::vector<deUint64>			matches;
3173 
3174 	for (std::size_t i = 0; i < cbottoms; ++i)
3175 	{
3176 		const deUint64& lookAddr	= addresses[i+1];
3177 		auto			end			= matches.end();
3178 		auto			match		= std::find_if(matches.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
3179 		if (match == end)
3180 		{
3181 			matches.emplace_back(lookAddr);
3182 			m_bottomLevelInstances[i].get()->serialize(vk, device, cmdBuffer, storage->getBottomStorage(storageIndex).get());
3183 			storageIndex += 1;
3184 		}
3185 	}
3186 }
3187 
createAndDeserializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage)3188 void TopLevelAccelerationStructureKHR::createAndDeserializeBottoms (const DeviceInterface&	vk,
3189 																	const VkDevice			device,
3190 																	const VkCommandBuffer	cmdBuffer,
3191 																	Allocator&				allocator,
3192 																	SerialStorage*			storage)
3193 {
3194 	DE_ASSERT(storage->hasDeepFormat());
3195 	DE_ASSERT(m_bottomLevelInstances.size() == 0);
3196 
3197 	const std::vector<deUint64>&					addresses		= storage->getSerialInfo().addresses();
3198 	const std::size_t								cbottoms		= addresses.size() - 1;
3199 	deUint32										storageIndex	= 0;
3200 	std::vector<std::pair<deUint64, std::size_t>>	matches;
3201 
3202 	for (std::size_t i = 0; i < cbottoms; ++i)
3203 	{
3204 		const deUint64& lookAddr	= addresses[i+1];
3205 		auto			end			= matches.end();
3206 		auto			match		= std::find_if(matches.begin(), end, [&](const std::pair<deUint64, std::size_t>& item){ return item.first == lookAddr; });
3207 		if (match != end)
3208 		{
3209 			m_bottomLevelInstances .emplace_back(m_bottomLevelInstances[match->second]);
3210 		}
3211 		else
3212 		{
3213 			de::MovePtr<BottomLevelAccelerationStructure> blas = makeBottomLevelAccelerationStructure();
3214 			blas->createAndDeserializeFrom(vk, device, cmdBuffer, allocator, storage->getBottomStorage(storageIndex).get());
3215 			m_bottomLevelInstances.emplace_back(de::SharedPtr<BottomLevelAccelerationStructure>(blas.release()));
3216 			matches.emplace_back(lookAddr, i);
3217 			storageIndex += 1;
3218 		}
3219 	}
3220 
3221 	std::vector<deUint64>						newAddresses	= getSerializingAddresses(vk, device);
3222 	DE_ASSERT(addresses.size() == newAddresses.size());
3223 
3224 	SerialStorage::AccelerationStructureHeader* header			= storage->getASHeader();
3225 	DE_ASSERT(cbottoms ==header->handleCount);
3226 
3227 	// finally update bottom-level AS addresses before top-level AS deserialization
3228 	for (std::size_t i = 0; i < cbottoms; ++i)
3229 	{
3230 		header->handleArray[i] = newAddresses[i+1];
3231 	}
3232 }
3233 
getSerializingSizes(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const deUint32 queueFamilyIndex)3234 std::vector<VkDeviceSize> TopLevelAccelerationStructureKHR::getSerializingSizes (const DeviceInterface&	vk,
3235 																				 const VkDevice			device,
3236 																				 const VkQueue			queue,
3237 																				 const deUint32			queueFamilyIndex)
3238 {
3239 	const deUint32							queryCount(deUint32(m_bottomLevelInstances.size()) + 1);
3240 	std::vector<VkAccelerationStructureKHR>	handles(queryCount);
3241 	std::vector<VkDeviceSize>				sizes(queryCount);
3242 
3243 	handles[0] = m_accelerationStructureKHR.get();
3244 
3245 	for (deUint32 h = 1; h < queryCount; ++h)
3246 		handles[h] = *m_bottomLevelInstances[h-1].get()->getPtr();
3247 
3248 	if (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR == m_buildType)
3249 		queryAccelerationStructureSize(vk, device, DE_NULL, handles, m_buildType, DE_NULL, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3250 	else
3251 	{
3252 		const Move<VkCommandPool>	cmdPool		= createCommandPool(vk, device, 0, queueFamilyIndex);
3253 		const Move<VkCommandBuffer>	cmdBuffer	= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3254 		const Move<VkQueryPool>		queryPool	= makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3255 
3256 		beginCommandBuffer(vk, *cmdBuffer);
3257 		queryAccelerationStructureSize(vk, device, *cmdBuffer, handles, m_buildType, *queryPool, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3258 		endCommandBuffer(vk, *cmdBuffer);
3259 		submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
3260 
3261 		VK_CHECK(vk.getQueryPoolResults(device, *queryPool, 0u, queryCount, queryCount * sizeof(VkDeviceSize), sizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3262 	}
3263 
3264 	return sizes;
3265 }
3266 
getSerializingAddresses(const DeviceInterface & vk,const VkDevice device) const3267 std::vector<deUint64> TopLevelAccelerationStructureKHR::getSerializingAddresses (const DeviceInterface& vk, const VkDevice device) const
3268 {
3269 	std::vector<deUint64> result(m_bottomLevelInstances.size() + 1);
3270 
3271 	VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
3272 	{
3273 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,	// VkStructureType				sType;
3274 		DE_NULL,															// const void*					pNext;
3275 		DE_NULL																// VkAccelerationStructureKHR	accelerationStructure;
3276 	};
3277 
3278 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3279 	{
3280 		asDeviceAddressInfo.accelerationStructure = m_accelerationStructureKHR.get();
3281 		result[0] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3282 	}
3283 	else
3284 	{
3285 		result[0] = deUint64(getPtr()->getInternal());
3286 	}
3287 
3288 	for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3289 	{
3290 		const BottomLevelAccelerationStructure&		bottomLevelAccelerationStructure	= *m_bottomLevelInstances[instanceNdx];
3291 		const VkAccelerationStructureKHR			accelerationStructureKHR			= *bottomLevelAccelerationStructure.getPtr();
3292 
3293 		if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3294 		{
3295 			asDeviceAddressInfo.accelerationStructure = accelerationStructureKHR;
3296 			result[instanceNdx+1] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3297 		}
3298 		else
3299 		{
3300 			result[instanceNdx+1] = deUint64(accelerationStructureKHR.getInternal());
3301 		}
3302 	}
3303 
3304 	return result;
3305 }
3306 
getPtr(void) const3307 const VkAccelerationStructureKHR* TopLevelAccelerationStructureKHR::getPtr (void) const
3308 {
3309 	return &m_accelerationStructureKHR.get();
3310 }
3311 
prepareInstances(const DeviceInterface & vk,const VkDevice device,VkAccelerationStructureGeometryKHR & accelerationStructureGeometryKHR,std::vector<deUint32> & maxPrimitiveCounts)3312 void TopLevelAccelerationStructureKHR::prepareInstances (const DeviceInterface&							vk,
3313 														 const VkDevice									device,
3314 														 VkAccelerationStructureGeometryKHR&			accelerationStructureGeometryKHR,
3315 														 std::vector<deUint32>&							maxPrimitiveCounts)
3316 {
3317 	maxPrimitiveCounts.resize(1);
3318 	maxPrimitiveCounts[0] = static_cast<deUint32>(m_bottomLevelInstances.size());
3319 
3320 	VkDeviceOrHostAddressConstKHR							instancesData;
3321 	if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3322 	{
3323 		if(m_instanceBuffer.get() != DE_NULL)
3324 		{
3325 			if (m_useArrayOfPointers)
3326 			{
3327 				deUint8*						bufferStart			= static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3328 				VkDeviceSize					bufferOffset		= 0;
3329 				VkDeviceOrHostAddressConstKHR	firstInstance		= makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3330 				for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3331 				{
3332 					VkDeviceOrHostAddressConstKHR	currentInstance;
3333 					currentInstance.deviceAddress	= firstInstance.deviceAddress + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3334 
3335 					deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress));
3336 					bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress);
3337 				}
3338 				flushMappedMemoryRange(vk, device, m_instanceAddressBuffer->getAllocation().getMemory(), m_instanceAddressBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
3339 
3340 				instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceAddressBuffer->get(), 0);
3341 			}
3342 			else
3343 				instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3344 		}
3345 		else
3346 			instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3347 	}
3348 	else
3349 	{
3350 		if (m_instanceBuffer.get() != DE_NULL)
3351 		{
3352 			if (m_useArrayOfPointers)
3353 			{
3354 				deUint8*						bufferStart			= static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3355 				VkDeviceSize					bufferOffset		= 0;
3356 				for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3357 				{
3358 					VkDeviceOrHostAddressConstKHR	currentInstance;
3359 					currentInstance.hostAddress	= (deUint8*)m_instanceBuffer->getAllocation().getHostPtr() + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3360 
3361 					deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::hostAddress));
3362 					bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
3363 				}
3364 				instancesData = makeDeviceOrHostAddressConstKHR(m_instanceAddressBuffer->getAllocation().getHostPtr());
3365 			}
3366 			else
3367 				instancesData = makeDeviceOrHostAddressConstKHR(m_instanceBuffer->getAllocation().getHostPtr());
3368 		}
3369 		else
3370 			instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3371 	}
3372 
3373 	VkAccelerationStructureGeometryInstancesDataKHR accelerationStructureGeometryInstancesDataKHR	=
3374 	{
3375 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,	//  VkStructureType					sType;
3376 		DE_NULL,																//  const void*						pNext;
3377 		(VkBool32)( m_useArrayOfPointers ? DE_TRUE : DE_FALSE ),				//  VkBool32						arrayOfPointers;
3378 		instancesData															//  VkDeviceOrHostAddressConstKHR	data;
3379 	};
3380 
3381 	accelerationStructureGeometryKHR					=
3382 	{
3383 		VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,										//  VkStructureType							sType;
3384 		DE_NULL,																					//  const void*								pNext;
3385 		VK_GEOMETRY_TYPE_INSTANCES_KHR,																//  VkGeometryTypeKHR						geometryType;
3386 		makeVkAccelerationStructureInstancesDataKHR(accelerationStructureGeometryInstancesDataKHR),	//  VkAccelerationStructureGeometryDataKHR	geometry;
3387 		(VkGeometryFlagsKHR)0u																		//  VkGeometryFlagsKHR						flags;
3388 	};
3389 }
3390 
getRequiredAllocationCount(void)3391 deUint32 TopLevelAccelerationStructure::getRequiredAllocationCount (void)
3392 {
3393 	return TopLevelAccelerationStructureKHR::getRequiredAllocationCount();
3394 }
3395 
makeTopLevelAccelerationStructure()3396 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure ()
3397 {
3398 	return de::MovePtr<TopLevelAccelerationStructure>(new TopLevelAccelerationStructureKHR);
3399 }
3400 
queryAccelerationStructureSizeKHR(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3401 bool queryAccelerationStructureSizeKHR (const DeviceInterface&							vk,
3402 										const VkDevice									device,
3403 										const VkCommandBuffer							cmdBuffer,
3404 										const std::vector<VkAccelerationStructureKHR>&	accelerationStructureHandles,
3405 										VkAccelerationStructureBuildTypeKHR				buildType,
3406 										const VkQueryPool								queryPool,
3407 										VkQueryType										queryType,
3408 										deUint32										firstQuery,
3409 										std::vector<VkDeviceSize>&						results)
3410 {
3411 	DE_ASSERT(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR);
3412 
3413 	if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3414 	{
3415 		// queryPool must be large enough to contain at least (firstQuery + accelerationStructureHandles.size()) queries
3416 		vk.cmdResetQueryPool(cmdBuffer, queryPool, firstQuery, deUint32(accelerationStructureHandles.size()));
3417 		vk.cmdWriteAccelerationStructuresPropertiesKHR(cmdBuffer, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType, queryPool, firstQuery);
3418 		// results cannot be retrieved to CPU at the moment - you need to do it using getQueryPoolResults after cmdBuffer is executed. Meanwhile function returns a vector of 0s.
3419 		results.resize(accelerationStructureHandles.size(), 0u);
3420 		return false;
3421 	}
3422 	// buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
3423 	results.resize(accelerationStructureHandles.size(), 0u);
3424 	vk.writeAccelerationStructuresPropertiesKHR(device, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType,
3425 												sizeof(VkDeviceSize) * accelerationStructureHandles.size(), results.data(), sizeof(VkDeviceSize));
3426 	// results will contain proper values
3427 	return true;
3428 }
3429 
queryAccelerationStructureSize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3430 bool queryAccelerationStructureSize (const DeviceInterface&							vk,
3431 									 const VkDevice									device,
3432 									 const VkCommandBuffer							cmdBuffer,
3433 									 const std::vector<VkAccelerationStructureKHR>&	accelerationStructureHandles,
3434 									 VkAccelerationStructureBuildTypeKHR			buildType,
3435 									 const VkQueryPool								queryPool,
3436 									 VkQueryType									queryType,
3437 									 deUint32										firstQuery,
3438 									 std::vector<VkDeviceSize>&						results)
3439 {
3440 	return queryAccelerationStructureSizeKHR(vk, device, cmdBuffer, accelerationStructureHandles, buildType, queryPool, queryType, firstQuery, results);
3441 }
3442 
RayTracingPipeline()3443 RayTracingPipeline::RayTracingPipeline ()
3444 	: m_shadersModules			()
3445 	, m_pipelineLibraries		()
3446 	, m_shaderCreateInfos		()
3447 	, m_shadersGroupCreateInfos	()
3448 	, m_pipelineCreateFlags		(0U)
3449 	, m_pipelineCreateFlags2	(0U)
3450 	, m_maxRecursionDepth		(1U)
3451 	, m_maxPayloadSize			(0U)
3452 	, m_maxAttributeSize		(0U)
3453 	, m_deferredOperation		(false)
3454 	, m_workerThreadCount		(0)
3455 {
3456 }
3457 
~RayTracingPipeline()3458 RayTracingPipeline::~RayTracingPipeline ()
3459 {
3460 }
3461 
3462 #define CHECKED_ASSIGN_SHADER(SHADER, STAGE)						\
3463 	if (SHADER == VK_SHADER_UNUSED_KHR)								\
3464 		SHADER = STAGE;												\
3465 	else															\
3466 		TCU_THROW(InternalError, "Attempt to reassign shader")
3467 
addShader(VkShaderStageFlagBits shaderStage,Move<VkShaderModule> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfo,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3468 void RayTracingPipeline::addShader (VkShaderStageFlagBits					shaderStage,
3469 									Move<VkShaderModule>					shaderModule,
3470 									deUint32								group,
3471 									const VkSpecializationInfo*				specializationInfo,
3472 									const VkPipelineShaderStageCreateFlags	pipelineShaderStageCreateFlags,
3473 									const void*								pipelineShaderStageCreateInfopNext)
3474 {
3475 	addShader(shaderStage, makeVkSharedPtr(shaderModule), group, specializationInfo, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3476 }
3477 
addShader(VkShaderStageFlagBits shaderStage,de::SharedPtr<Move<VkShaderModule>> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3478 void RayTracingPipeline::addShader (VkShaderStageFlagBits					shaderStage,
3479 									de::SharedPtr<Move<VkShaderModule>>		shaderModule,
3480 									deUint32								group,
3481 									const VkSpecializationInfo*				specializationInfoPtr,
3482 									const VkPipelineShaderStageCreateFlags	pipelineShaderStageCreateFlags,
3483 									const void*								pipelineShaderStageCreateInfopNext)
3484 {
3485 	addShader(shaderStage, **shaderModule, group, specializationInfoPtr, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3486 	m_shadersModules.push_back(shaderModule);
3487 }
3488 
addShader(VkShaderStageFlagBits shaderStage,VkShaderModule shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3489 void RayTracingPipeline::addShader (VkShaderStageFlagBits					shaderStage,
3490 									VkShaderModule							shaderModule,
3491 									deUint32								group,
3492 									const VkSpecializationInfo*				specializationInfoPtr,
3493 									const VkPipelineShaderStageCreateFlags	pipelineShaderStageCreateFlags,
3494 									const void*								pipelineShaderStageCreateInfopNext)
3495 {
3496 	if (group >= m_shadersGroupCreateInfos.size())
3497 	{
3498 		for (size_t groupNdx = m_shadersGroupCreateInfos.size(); groupNdx <= group; ++groupNdx)
3499 		{
3500 			VkRayTracingShaderGroupCreateInfoKHR	shaderGroupCreateInfo	=
3501 			{
3502 				VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,	//  VkStructureType					sType;
3503 				DE_NULL,													//  const void*						pNext;
3504 				VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR,				//  VkRayTracingShaderGroupTypeKHR	type;
3505 				VK_SHADER_UNUSED_KHR,										//  deUint32						generalShader;
3506 				VK_SHADER_UNUSED_KHR,										//  deUint32						closestHitShader;
3507 				VK_SHADER_UNUSED_KHR,										//  deUint32						anyHitShader;
3508 				VK_SHADER_UNUSED_KHR,										//  deUint32						intersectionShader;
3509 				DE_NULL,													//  const void*						pShaderGroupCaptureReplayHandle;
3510 			};
3511 
3512 			m_shadersGroupCreateInfos.push_back(shaderGroupCreateInfo);
3513 		}
3514 	}
3515 
3516 	const deUint32							shaderStageNdx			= (deUint32)m_shaderCreateInfos.size();
3517 	VkRayTracingShaderGroupCreateInfoKHR&	shaderGroupCreateInfo	= m_shadersGroupCreateInfos[group];
3518 
3519 	switch (shaderStage)
3520 	{
3521 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:		CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,		shaderStageNdx);	break;
3522 		case VK_SHADER_STAGE_MISS_BIT_KHR:			CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,		shaderStageNdx);	break;
3523 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:		CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,		shaderStageNdx);	break;
3524 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:		CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.anyHitShader,		shaderStageNdx);	break;
3525 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:	CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.closestHitShader,	shaderStageNdx);	break;
3526 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:	CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.intersectionShader,	shaderStageNdx);	break;
3527 		default:									TCU_THROW(InternalError, "Unacceptable stage");
3528 	}
3529 
3530 	switch (shaderStage)
3531 	{
3532 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
3533 		case VK_SHADER_STAGE_MISS_BIT_KHR:
3534 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
3535 		{
3536 			DE_ASSERT(shaderGroupCreateInfo.type == VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR);
3537 			shaderGroupCreateInfo.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
3538 
3539 			break;
3540 		}
3541 
3542 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
3543 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
3544 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
3545 		{
3546 			DE_ASSERT(shaderGroupCreateInfo.type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR);
3547 			shaderGroupCreateInfo.type	= (shaderGroupCreateInfo.intersectionShader == VK_SHADER_UNUSED_KHR)
3548 										? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR
3549 										: VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR;
3550 
3551 			break;
3552 		}
3553 
3554 		default: TCU_THROW(InternalError, "Unacceptable stage");
3555 	}
3556 
3557 	{
3558 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo	=
3559 		{
3560 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//  VkStructureType						sType;
3561 			pipelineShaderStageCreateInfopNext,						//  const void*							pNext;
3562 			pipelineShaderStageCreateFlags,							//  VkPipelineShaderStageCreateFlags	flags;
3563 			shaderStage,											//  VkShaderStageFlagBits				stage;
3564 			shaderModule,											//  VkShaderModule						module;
3565 			"main",													//  const char*							pName;
3566 			specializationInfoPtr,									//  const VkSpecializationInfo*			pSpecializationInfo;
3567 		};
3568 
3569 		m_shaderCreateInfos.push_back(shaderCreateInfo);
3570 	}
3571 }
3572 
setGroupCaptureReplayHandle(uint32_t group,const void * pShaderGroupCaptureReplayHandle)3573 void RayTracingPipeline::setGroupCaptureReplayHandle (uint32_t group, const void* pShaderGroupCaptureReplayHandle)
3574 {
3575 	DE_ASSERT(static_cast<size_t>(group) < m_shadersGroupCreateInfos.size());
3576 	m_shadersGroupCreateInfos[group].pShaderGroupCaptureReplayHandle = pShaderGroupCaptureReplayHandle;
3577 }
3578 
addLibrary(de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)3579 void RayTracingPipeline::addLibrary (de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)
3580 {
3581 	m_pipelineLibraries.push_back(pipelineLibrary);
3582 }
3583 
getShaderGroupCount(void)3584 uint32_t RayTracingPipeline::getShaderGroupCount (void)
3585 {
3586 	return de::sizeU32(m_shadersGroupCreateInfos);
3587 }
3588 
getFullShaderGroupCount(void)3589 uint32_t RayTracingPipeline::getFullShaderGroupCount (void)
3590 {
3591 	uint32_t totalCount = getShaderGroupCount();
3592 
3593 	for (const auto& lib : m_pipelineLibraries)
3594 		totalCount += lib->get()->getFullShaderGroupCount();
3595 
3596 	return totalCount;
3597 }
3598 
createPipelineKHR(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3599 Move<VkPipeline> RayTracingPipeline::createPipelineKHR (const DeviceInterface&			vk,
3600 														const VkDevice					device,
3601 														const VkPipelineLayout			pipelineLayout,
3602 														const std::vector<VkPipeline>&	pipelineLibraries,
3603 														const VkPipelineCache			pipelineCache)
3604 {
3605 	for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3606 		DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3607 
3608 	VkPipelineLibraryCreateInfoKHR				librariesCreateInfo	=
3609 	{
3610 		VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR,	//  VkStructureType	sType;
3611 		DE_NULL,											//  const void*		pNext;
3612 		de::sizeU32(pipelineLibraries),						//  deUint32		libraryCount;
3613 		de::dataOrNull(pipelineLibraries)					//  VkPipeline*		pLibraries;
3614 	};
3615 	const VkRayTracingPipelineInterfaceCreateInfoKHR	pipelineInterfaceCreateInfo		=
3616 	{
3617 		VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR,	//  VkStructureType	sType;
3618 		DE_NULL,															//  const void*		pNext;
3619 		m_maxPayloadSize,													//  deUint32		maxPayloadSize;
3620 		m_maxAttributeSize													//  deUint32		maxAttributeSize;
3621 	};
3622 	const bool											addPipelineInterfaceCreateInfo	= m_maxPayloadSize != 0 || m_maxAttributeSize != 0;
3623 	const VkRayTracingPipelineInterfaceCreateInfoKHR*	pipelineInterfaceCreateInfoPtr	= addPipelineInterfaceCreateInfo ? &pipelineInterfaceCreateInfo : DE_NULL;
3624 	const VkPipelineLibraryCreateInfoKHR*				librariesCreateInfoPtr			= (pipelineLibraries.empty() ? nullptr : &librariesCreateInfo);
3625 
3626 	Move<VkDeferredOperationKHR>						deferredOperation;
3627 	if (m_deferredOperation)
3628 		deferredOperation = createDeferredOperationKHR(vk, device);
3629 
3630 	VkPipelineDynamicStateCreateInfo dynamicStateCreateInfo	=
3631 	{
3632 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	// VkStructureType						sType;
3633 		DE_NULL,												// const void*							pNext;
3634 		0,														// VkPipelineDynamicStateCreateFlags	flags;
3635 		static_cast<deUint32>(m_dynamicStates.size() ),			// deUint32								dynamicStateCount;
3636 		m_dynamicStates.data(),									// const VkDynamicState*				pDynamicStates;
3637 	};
3638 
3639 	VkRayTracingPipelineCreateInfoKHR					pipelineCreateInfo
3640 	{
3641 		VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,	//  VkStructureType								sType;
3642 		DE_NULL,												//  const void*									pNext;
3643 		m_pipelineCreateFlags,									//  VkPipelineCreateFlags						flags;
3644 		de::sizeU32(m_shaderCreateInfos),						//  deUint32									stageCount;
3645 		de::dataOrNull(m_shaderCreateInfos),					//  const VkPipelineShaderStageCreateInfo*		pStages;
3646 		de::sizeU32(m_shadersGroupCreateInfos),					//  deUint32									groupCount;
3647 		de::dataOrNull(m_shadersGroupCreateInfos),				//  const VkRayTracingShaderGroupCreateInfoKHR*	pGroups;
3648 		m_maxRecursionDepth,									//  deUint32									maxRecursionDepth;
3649 		librariesCreateInfoPtr,									//  VkPipelineLibraryCreateInfoKHR*				pLibraryInfo;
3650 		pipelineInterfaceCreateInfoPtr,							//  VkRayTracingPipelineInterfaceCreateInfoKHR*	pLibraryInterface;
3651 		&dynamicStateCreateInfo,								//  const VkPipelineDynamicStateCreateInfo*		pDynamicState;
3652 		pipelineLayout,											//  VkPipelineLayout							layout;
3653 		(VkPipeline)DE_NULL,									//  VkPipeline									basePipelineHandle;
3654 		0,														//  deInt32										basePipelineIndex;
3655 	};
3656 	VkPipeline											object							= DE_NULL;
3657 	VkResult											result							= vk.createRayTracingPipelinesKHR(device, deferredOperation.get(), pipelineCache, 1u, &pipelineCreateInfo, DE_NULL, &object);
3658 	const bool											allowCompileRequired			= ((m_pipelineCreateFlags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) != 0);
3659 
3660 	VkPipelineCreateFlags2CreateInfoKHR					pipelineFlags2CreateInfo		= initVulkanStructure();
3661 	if (m_pipelineCreateFlags2)
3662 	{
3663 		pipelineFlags2CreateInfo.flags	= m_pipelineCreateFlags2;
3664 		pipelineCreateInfo.pNext		= &pipelineFlags2CreateInfo;
3665 		pipelineCreateInfo.flags		= 0;
3666 	}
3667 
3668 	if (m_deferredOperation)
3669 	{
3670 		DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS || (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED));
3671 		finishDeferredOperation(vk, device, deferredOperation.get(), m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3672 	}
3673 
3674 	if (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED)
3675 		throw CompileRequiredError("createRayTracingPipelinesKHR returned VK_PIPELINE_COMPILE_REQUIRED");
3676 
3677 	Move<VkPipeline> pipeline (check<VkPipeline>(object), Deleter<VkPipeline>(vk, device, DE_NULL));
3678 	return pipeline;
3679 }
3680 
3681 
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<de::SharedPtr<Move<VkPipeline>>> & pipelineLibraries)3682 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface&									vk,
3683 													 const VkDevice											device,
3684 													 const VkPipelineLayout									pipelineLayout,
3685 													 const std::vector<de::SharedPtr<Move<VkPipeline>>>&	pipelineLibraries)
3686 {
3687 	std::vector<VkPipeline> rawPipelines;
3688 	rawPipelines.reserve(pipelineLibraries.size());
3689 	for (const auto& lib : pipelineLibraries)
3690 		rawPipelines.push_back(lib.get()->get());
3691 
3692 	return createPipelineKHR(vk, device, pipelineLayout, rawPipelines);
3693 }
3694 
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3695 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface&			vk,
3696 													 const VkDevice					device,
3697 													 const VkPipelineLayout			pipelineLayout,
3698 													 const std::vector<VkPipeline>&	pipelineLibraries,
3699 													 const VkPipelineCache			pipelineCache)
3700 {
3701 	return createPipelineKHR(vk, device, pipelineLayout, pipelineLibraries, pipelineCache);
3702 }
3703 
createPipelineWithLibraries(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout)3704 std::vector<de::SharedPtr<Move<VkPipeline>>> RayTracingPipeline::createPipelineWithLibraries (const DeviceInterface&			vk,
3705 																								const VkDevice					device,
3706 																								const VkPipelineLayout			pipelineLayout)
3707 {
3708 	for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3709 		DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3710 
3711 	DE_ASSERT(m_shaderCreateInfos.size() > 0);
3712 	DE_ASSERT(m_shadersGroupCreateInfos.size() > 0);
3713 
3714 	std::vector<de::SharedPtr<Move<VkPipeline>>> result, allLibraries, firstLibraries;
3715 	for(auto it=begin(m_pipelineLibraries), eit=end(m_pipelineLibraries); it!=eit; ++it)
3716 	{
3717 		auto childLibraries = (*it)->get()->createPipelineWithLibraries(vk, device, pipelineLayout);
3718 		DE_ASSERT(childLibraries.size() > 0);
3719 		firstLibraries.push_back(childLibraries[0]);
3720 		std::copy(begin(childLibraries), end(childLibraries), std::back_inserter(allLibraries));
3721 	}
3722 	result.push_back(makeVkSharedPtr(createPipeline(vk, device, pipelineLayout, firstLibraries)));
3723 	std::copy(begin(allLibraries), end(allLibraries), std::back_inserter(result));
3724 	return result;
3725 }
3726 
getShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 shaderGroupHandleSize,const deUint32 firstGroup,const deUint32 groupCount) const3727 std::vector<uint8_t> RayTracingPipeline::getShaderGroupHandles (const DeviceInterface&		vk,
3728 																const VkDevice				device,
3729 																const VkPipeline			pipeline,
3730 																const deUint32				shaderGroupHandleSize,
3731 																const deUint32				firstGroup,
3732 																const deUint32				groupCount) const
3733 {
3734 	const auto				handleArraySizeBytes	= groupCount * shaderGroupHandleSize;
3735 	std::vector<uint8_t>	shaderHandles			(handleArraySizeBytes);
3736 
3737 	VK_CHECK(getRayTracingShaderGroupHandles(vk, device, pipeline,
3738 											 firstGroup, groupCount,
3739 											 static_cast<uintptr_t>(shaderHandles.size()), de::dataOrNull(shaderHandles)));
3740 
3741 	return shaderHandles;
3742 }
3743 
getShaderGroupReplayHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 shaderGroupHandleReplaySize,const deUint32 firstGroup,const deUint32 groupCount) const3744 std::vector<uint8_t> RayTracingPipeline::getShaderGroupReplayHandles (const DeviceInterface &vk,
3745 																	  const VkDevice device,
3746 																	  const VkPipeline pipeline,
3747 																	  const deUint32 shaderGroupHandleReplaySize,
3748 																	  const deUint32 firstGroup,
3749 																	  const deUint32 groupCount) const
3750 {
3751 	const auto				handleArraySizeBytes	= groupCount * shaderGroupHandleReplaySize;
3752 	std::vector<uint8_t>	shaderHandles			(handleArraySizeBytes);
3753 
3754 	VK_CHECK(getRayTracingCaptureReplayShaderGroupHandles(vk, device, pipeline,
3755 														  firstGroup, groupCount,
3756 														  static_cast<uintptr_t>(shaderHandles.size()), de::dataOrNull(shaderHandles)));
3757 
3758 	return shaderHandles;
3759 }
3760 
createShaderBindingTable(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,Allocator & allocator,const deUint32 & shaderGroupHandleSize,const deUint32 shaderGroupBaseAlignment,const deUint32 & firstGroup,const deUint32 & groupCount,const VkBufferCreateFlags & additionalBufferCreateFlags,const VkBufferUsageFlags & additionalBufferUsageFlags,const MemoryRequirement & additionalMemoryRequirement,const VkDeviceAddress & opaqueCaptureAddress,const deUint32 shaderBindingTableOffset,const deUint32 shaderRecordSize,const void ** shaderGroupDataPtrPerGroup,const bool autoAlignRecords)3761 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable	(const DeviceInterface&			vk,
3762 																			 const VkDevice					device,
3763 																			 const VkPipeline				pipeline,
3764 																			 Allocator&						allocator,
3765 																			 const deUint32&				shaderGroupHandleSize,
3766 																			 const deUint32					shaderGroupBaseAlignment,
3767 																			 const deUint32&				firstGroup,
3768 																			 const deUint32&				groupCount,
3769 																			 const VkBufferCreateFlags&		additionalBufferCreateFlags,
3770 																			 const VkBufferUsageFlags&		additionalBufferUsageFlags,
3771 																			 const MemoryRequirement&		additionalMemoryRequirement,
3772 																			 const VkDeviceAddress&			opaqueCaptureAddress,
3773 																			 const deUint32					shaderBindingTableOffset,
3774 																			 const deUint32					shaderRecordSize,
3775 																			 const void**					shaderGroupDataPtrPerGroup,
3776 																			 const bool						autoAlignRecords)
3777 {
3778 	const auto shaderHandles = getShaderGroupHandles(vk, device, pipeline, shaderGroupHandleSize, firstGroup, groupCount);
3779 	return createShaderBindingTable(vk, device, allocator,
3780 									shaderGroupHandleSize, shaderGroupBaseAlignment, shaderHandles,
3781 									additionalBufferCreateFlags, additionalBufferUsageFlags, additionalMemoryRequirement,
3782 									opaqueCaptureAddress,
3783 									shaderBindingTableOffset, shaderRecordSize, shaderGroupDataPtrPerGroup,
3784 									autoAlignRecords);
3785 }
3786 
createShaderBindingTable(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const deUint32 shaderGroupHandleSize,const deUint32 shaderGroupBaseAlignment,const std::vector<uint8_t> & shaderHandles,const VkBufferCreateFlags additionalBufferCreateFlags,const VkBufferUsageFlags additionalBufferUsageFlags,const MemoryRequirement & additionalMemoryRequirement,const VkDeviceAddress opaqueCaptureAddress,const deUint32 shaderBindingTableOffset,const deUint32 shaderRecordSize,const void ** shaderGroupDataPtrPerGroup,const bool autoAlignRecords)3787 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable (const DeviceInterface&		vk,
3788 																			const VkDevice				device,
3789 																			Allocator&					allocator,
3790 																			const deUint32				shaderGroupHandleSize,
3791 																			const deUint32				shaderGroupBaseAlignment,
3792 																			const std::vector<uint8_t>&	shaderHandles,
3793 																			const VkBufferCreateFlags	additionalBufferCreateFlags,
3794 																			const VkBufferUsageFlags	additionalBufferUsageFlags,
3795 																			const MemoryRequirement&	additionalMemoryRequirement,
3796 																			const VkDeviceAddress		opaqueCaptureAddress,
3797 																			const deUint32				shaderBindingTableOffset,
3798 																			const deUint32				shaderRecordSize,
3799 																			const void**				shaderGroupDataPtrPerGroup,
3800 																			const bool					autoAlignRecords)
3801 {
3802 	DE_ASSERT(shaderGroupBaseAlignment != 0u);
3803 	DE_ASSERT((shaderBindingTableOffset % shaderGroupBaseAlignment) == 0);
3804 	DE_UNREF(shaderGroupBaseAlignment);
3805 
3806 	const auto								groupCount						= de::sizeU32(shaderHandles) / shaderGroupHandleSize;
3807 	const auto								totalEntrySize					= (autoAlignRecords ? (deAlign32(shaderGroupHandleSize + shaderRecordSize, shaderGroupHandleSize)) : (shaderGroupHandleSize + shaderRecordSize));
3808 	const deUint32							sbtSize							= shaderBindingTableOffset + groupCount * totalEntrySize;
3809 	const VkBufferUsageFlags				sbtFlags						= VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | additionalBufferUsageFlags;
3810 	VkBufferCreateInfo						sbtCreateInfo					= makeBufferCreateInfo(sbtSize, sbtFlags);
3811 	sbtCreateInfo.flags														|= additionalBufferCreateFlags;
3812 	VkBufferUsageFlags2CreateInfoKHR		bufferUsageFlags2				= vk::initVulkanStructure();
3813 	VkBufferOpaqueCaptureAddressCreateInfo	sbtCaptureAddressInfo			=
3814 	{
3815 		VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO,	// VkStructureType	sType;
3816 		DE_NULL,														// const void*		pNext;
3817 		deUint64(opaqueCaptureAddress)									// deUint64			opaqueCaptureAddress;
3818 	};
3819 
3820 	// when maintenance5 is tested then m_pipelineCreateFlags2 is non-zero
3821 	if (m_pipelineCreateFlags2)
3822 	{
3823 		bufferUsageFlags2.usage = (VkBufferUsageFlags2KHR)sbtFlags;
3824 		sbtCreateInfo.pNext = &bufferUsageFlags2;
3825 		sbtCreateInfo.usage = 0;
3826 	}
3827 
3828 	if (opaqueCaptureAddress != 0u)
3829 	{
3830 		sbtCreateInfo.pNext = &sbtCaptureAddressInfo;
3831 		sbtCreateInfo.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
3832 	}
3833 	const MemoryRequirement			sbtMemRequirements						= MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress | additionalMemoryRequirement;
3834 	de::MovePtr<BufferWithMemory>	sbtBuffer								= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, sbtCreateInfo, sbtMemRequirements));
3835 	vk::Allocation&					sbtAlloc								= sbtBuffer->getAllocation();
3836 
3837 	// Copy handles to table, leaving space for ShaderRecordKHR after each handle.
3838 	deUint8* shaderBegin = (deUint8*)sbtAlloc.getHostPtr() + shaderBindingTableOffset;
3839 	for (deUint32 idx = 0; idx < groupCount; ++idx)
3840 	{
3841 		const deUint8*	shaderSrcPos	= shaderHandles.data() + idx * shaderGroupHandleSize;
3842 		deUint8*		shaderDstPos	= shaderBegin + idx * totalEntrySize;
3843 		deMemcpy(shaderDstPos, shaderSrcPos, shaderGroupHandleSize);
3844 
3845 		if (shaderGroupDataPtrPerGroup		!= nullptr &&
3846 			shaderGroupDataPtrPerGroup[idx] != nullptr)
3847 		{
3848 			DE_ASSERT(sbtSize >= static_cast<deUint32>(shaderDstPos - shaderBegin) + shaderGroupHandleSize);
3849 
3850 			deMemcpy(	shaderDstPos + shaderGroupHandleSize,
3851 						shaderGroupDataPtrPerGroup[idx],
3852 						shaderRecordSize);
3853 		}
3854 	}
3855 
3856 	flushMappedMemoryRange(vk, device, sbtAlloc.getMemory(), sbtAlloc.getOffset(), VK_WHOLE_SIZE);
3857 
3858 	return sbtBuffer;
3859 }
3860 
setCreateFlags(const VkPipelineCreateFlags & pipelineCreateFlags)3861 void RayTracingPipeline::setCreateFlags (const VkPipelineCreateFlags& pipelineCreateFlags)
3862 {
3863 	m_pipelineCreateFlags = pipelineCreateFlags;
3864 }
3865 
setCreateFlags2(const VkPipelineCreateFlags2KHR & pipelineCreateFlags2)3866 void RayTracingPipeline::setCreateFlags2 (const VkPipelineCreateFlags2KHR& pipelineCreateFlags2)
3867 {
3868 	m_pipelineCreateFlags2 = pipelineCreateFlags2;
3869 }
3870 
setMaxRecursionDepth(const deUint32 & maxRecursionDepth)3871 void RayTracingPipeline::setMaxRecursionDepth (const deUint32& maxRecursionDepth)
3872 {
3873 	m_maxRecursionDepth = maxRecursionDepth;
3874 }
3875 
setMaxPayloadSize(const deUint32 & maxPayloadSize)3876 void RayTracingPipeline::setMaxPayloadSize (const deUint32& maxPayloadSize)
3877 {
3878 	m_maxPayloadSize = maxPayloadSize;
3879 }
3880 
setMaxAttributeSize(const deUint32 & maxAttributeSize)3881 void RayTracingPipeline::setMaxAttributeSize (const deUint32& maxAttributeSize)
3882 {
3883 	m_maxAttributeSize = maxAttributeSize;
3884 }
3885 
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)3886 void RayTracingPipeline::setDeferredOperation (const bool		deferredOperation,
3887 											   const deUint32	workerThreadCount)
3888 {
3889 	m_deferredOperation = deferredOperation;
3890 	m_workerThreadCount = workerThreadCount;
3891 }
3892 
addDynamicState(const VkDynamicState & dynamicState)3893 void RayTracingPipeline::addDynamicState(const VkDynamicState& dynamicState)
3894 {
3895 	m_dynamicStates.push_back(dynamicState);
3896 }
3897 
3898 class RayTracingPropertiesKHR : public RayTracingProperties
3899 {
3900 public:
3901 							RayTracingPropertiesKHR						() = delete;
3902 							RayTracingPropertiesKHR						(const InstanceInterface&	vki,
3903 																		 const VkPhysicalDevice		physicalDevice);
3904 	virtual					~RayTracingPropertiesKHR					();
3905 
getShaderGroupHandleSize(void)3906 	uint32_t		getShaderGroupHandleSize					(void)	override { return m_rayTracingPipelineProperties.shaderGroupHandleSize;						}
getShaderGroupHandleAlignment(void)3907 	uint32_t		getShaderGroupHandleAlignment				(void)	override { return m_rayTracingPipelineProperties.shaderGroupHandleAlignment;				}
getShaderGroupHandleCaptureReplaySize(void)3908 	deUint32		getShaderGroupHandleCaptureReplaySize		(void)	override { return m_rayTracingPipelineProperties.shaderGroupHandleCaptureReplaySize;		}
getMaxRecursionDepth(void)3909 	uint32_t		getMaxRecursionDepth						(void)	override { return m_rayTracingPipelineProperties.maxRayRecursionDepth;						}
getMaxShaderGroupStride(void)3910 	uint32_t		getMaxShaderGroupStride						(void)	override { return m_rayTracingPipelineProperties.maxShaderGroupStride;						}
getShaderGroupBaseAlignment(void)3911 	uint32_t		getShaderGroupBaseAlignment					(void)	override { return m_rayTracingPipelineProperties.shaderGroupBaseAlignment;					}
getMaxGeometryCount(void)3912 	uint64_t		getMaxGeometryCount							(void)	override { return m_accelerationStructureProperties.maxGeometryCount;						}
getMaxInstanceCount(void)3913 	uint64_t		getMaxInstanceCount							(void)	override { return m_accelerationStructureProperties.maxInstanceCount;						}
getMaxPrimitiveCount(void)3914 	uint64_t		getMaxPrimitiveCount						(void)	override { return m_accelerationStructureProperties.maxPrimitiveCount;						}
getMaxDescriptorSetAccelerationStructures(void)3915 	uint32_t		getMaxDescriptorSetAccelerationStructures	(void)	override { return m_accelerationStructureProperties.maxDescriptorSetAccelerationStructures;	}
getMaxRayDispatchInvocationCount(void)3916 	uint32_t		getMaxRayDispatchInvocationCount			(void)	override { return m_rayTracingPipelineProperties.maxRayDispatchInvocationCount;				}
getMaxRayHitAttributeSize(void)3917 	uint32_t		getMaxRayHitAttributeSize					(void)	override { return m_rayTracingPipelineProperties.maxRayHitAttributeSize;					}
getMaxMemoryAllocationCount(void)3918 	uint32_t		getMaxMemoryAllocationCount					(void)	override { return m_maxMemoryAllocationCount;												}
3919 
3920 protected:
3921 	VkPhysicalDeviceAccelerationStructurePropertiesKHR	m_accelerationStructureProperties;
3922 	VkPhysicalDeviceRayTracingPipelinePropertiesKHR		m_rayTracingPipelineProperties;
3923 	deUint32											m_maxMemoryAllocationCount;
3924 };
3925 
~RayTracingPropertiesKHR()3926 RayTracingPropertiesKHR::~RayTracingPropertiesKHR ()
3927 {
3928 }
3929 
RayTracingPropertiesKHR(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3930 RayTracingPropertiesKHR::RayTracingPropertiesKHR (const InstanceInterface&	vki,
3931 												  const VkPhysicalDevice	physicalDevice)
3932 	: RayTracingProperties	(vki, physicalDevice)
3933 {
3934 	m_accelerationStructureProperties	= getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3935 	m_rayTracingPipelineProperties		= getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3936 	m_maxMemoryAllocationCount			= getPhysicalDeviceProperties(vki, physicalDevice).limits.maxMemoryAllocationCount;
3937 }
3938 
makeRayTracingProperties(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3939 de::MovePtr<RayTracingProperties> makeRayTracingProperties (const InstanceInterface&	vki,
3940 															const VkPhysicalDevice		physicalDevice)
3941 {
3942 	return de::MovePtr<RayTracingProperties>(new RayTracingPropertiesKHR(vki, physicalDevice));
3943 }
3944 
cmdTraceRaysKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3945 static inline void cmdTraceRaysKHR (const DeviceInterface&					vk,
3946 									VkCommandBuffer							commandBuffer,
3947 									const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3948 									const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3949 									const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3950 									const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3951 									deUint32								width,
3952 									deUint32								height,
3953 									deUint32								depth)
3954 {
3955 	return vk.cmdTraceRaysKHR(commandBuffer,
3956 							  raygenShaderBindingTableRegion,
3957 							  missShaderBindingTableRegion,
3958 							  hitShaderBindingTableRegion,
3959 							  callableShaderBindingTableRegion,
3960 							  width,
3961 							  height,
3962 							  depth);
3963 }
3964 
3965 
cmdTraceRays(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3966 void cmdTraceRays (const DeviceInterface&					vk,
3967 				   VkCommandBuffer							commandBuffer,
3968 				   const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3969 				   const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3970 				   const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3971 				   const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3972 				   deUint32									width,
3973 				   deUint32									height,
3974 				   deUint32									depth)
3975 {
3976 	DE_ASSERT(raygenShaderBindingTableRegion	!= DE_NULL);
3977 	DE_ASSERT(missShaderBindingTableRegion		!= DE_NULL);
3978 	DE_ASSERT(hitShaderBindingTableRegion		!= DE_NULL);
3979 	DE_ASSERT(callableShaderBindingTableRegion	!= DE_NULL);
3980 
3981 	return cmdTraceRaysKHR(vk,
3982 						   commandBuffer,
3983 						   raygenShaderBindingTableRegion,
3984 						   missShaderBindingTableRegion,
3985 						   hitShaderBindingTableRegion,
3986 						   callableShaderBindingTableRegion,
3987 						   width,
3988 						   height,
3989 						   depth);
3990 }
3991 
cmdTraceRaysIndirectKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3992 static inline void cmdTraceRaysIndirectKHR (const DeviceInterface&					vk,
3993 											VkCommandBuffer							commandBuffer,
3994 											const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
3995 											const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
3996 											const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
3997 											const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
3998 											VkDeviceAddress							indirectDeviceAddress )
3999 {
4000 	DE_ASSERT(raygenShaderBindingTableRegion	!= DE_NULL);
4001 	DE_ASSERT(missShaderBindingTableRegion		!= DE_NULL);
4002 	DE_ASSERT(hitShaderBindingTableRegion		!= DE_NULL);
4003 	DE_ASSERT(callableShaderBindingTableRegion	!= DE_NULL);
4004 	DE_ASSERT(indirectDeviceAddress				!= 0);
4005 
4006 	return vk.cmdTraceRaysIndirectKHR(commandBuffer,
4007 									  raygenShaderBindingTableRegion,
4008 									  missShaderBindingTableRegion,
4009 									  hitShaderBindingTableRegion,
4010 									  callableShaderBindingTableRegion,
4011 									  indirectDeviceAddress);
4012 }
4013 
cmdTraceRaysIndirect(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)4014 void cmdTraceRaysIndirect (const DeviceInterface&					vk,
4015 						   VkCommandBuffer							commandBuffer,
4016 						   const VkStridedDeviceAddressRegionKHR*	raygenShaderBindingTableRegion,
4017 						   const VkStridedDeviceAddressRegionKHR*	missShaderBindingTableRegion,
4018 						   const VkStridedDeviceAddressRegionKHR*	hitShaderBindingTableRegion,
4019 						   const VkStridedDeviceAddressRegionKHR*	callableShaderBindingTableRegion,
4020 						   VkDeviceAddress							indirectDeviceAddress)
4021 {
4022 	return cmdTraceRaysIndirectKHR(vk,
4023 								   commandBuffer,
4024 								   raygenShaderBindingTableRegion,
4025 								   missShaderBindingTableRegion,
4026 								   hitShaderBindingTableRegion,
4027 								   callableShaderBindingTableRegion,
4028 								   indirectDeviceAddress);
4029 }
4030 
cmdTraceRaysIndirect2KHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)4031 static inline void cmdTraceRaysIndirect2KHR (const DeviceInterface&	vk,
4032 											VkCommandBuffer			commandBuffer,
4033 											VkDeviceAddress			indirectDeviceAddress )
4034 {
4035 	DE_ASSERT(indirectDeviceAddress != 0);
4036 
4037 	return vk.cmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
4038 }
4039 
cmdTraceRaysIndirect2(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)4040 void cmdTraceRaysIndirect2	(const DeviceInterface&	vk,
4041 							 VkCommandBuffer		commandBuffer,
4042 							 VkDeviceAddress		indirectDeviceAddress)
4043 {
4044 	return cmdTraceRaysIndirect2KHR(vk, commandBuffer, indirectDeviceAddress);
4045 }
4046 
4047 #else
4048 
4049 deUint32 rayTracingDefineAnything()
4050 {
4051 	return 0;
4052 }
4053 
4054 #endif // CTS_USES_VULKANSC
4055 
4056 } // vk
4057