1 /*-------------------------------------------------------------------------
2 * Vulkan CTS Framework
3 * --------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Utilities for creating commonly used Vulkan objects
22 *//*--------------------------------------------------------------------*/
23
24 #include "vkRayTracingUtil.hpp"
25
26 #include "vkRefUtil.hpp"
27 #include "vkQueryUtil.hpp"
28 #include "vkObjUtil.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkCmdUtil.hpp"
31
32 #include "deStringUtil.hpp"
33 #include "deSTLUtil.hpp"
34
35 #include <vector>
36 #include <string>
37 #include <thread>
38 #include <limits>
39 #include <type_traits>
40 #include <map>
41
42 namespace vk
43 {
44
45 #ifndef CTS_USES_VULKANSC
46
47 static const deUint32 WATCHDOG_INTERVAL = 16384; // Touch watchDog every N iterations.
48
49 struct DeferredThreadParams
50 {
51 const DeviceInterface& vk;
52 VkDevice device;
53 VkDeferredOperationKHR deferredOperation;
54 VkResult result;
55 };
56
getFormatSimpleName(vk::VkFormat format)57 std::string getFormatSimpleName (vk::VkFormat format)
58 {
59 constexpr size_t kPrefixLen = 10; // strlen("VK_FORMAT_")
60 return de::toLower(de::toString(format).substr(kPrefixLen));
61 }
62
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)63 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
64 {
65 float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
66 float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
67
68 if ((s < 0) != (t < 0))
69 return false;
70
71 float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
72
73 return a < 0 ?
74 (s <= 0 && s + t >= a) :
75 (s >= 0 && s + t <= a);
76 }
77
78 // Returns true if VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR needs to be supported for the given format.
isMandatoryAccelerationStructureVertexBufferFormat(vk::VkFormat format)79 static bool isMandatoryAccelerationStructureVertexBufferFormat (vk::VkFormat format)
80 {
81 bool mandatory = false;
82
83 switch (format)
84 {
85 case VK_FORMAT_R32G32_SFLOAT:
86 case VK_FORMAT_R32G32B32_SFLOAT:
87 case VK_FORMAT_R16G16_SFLOAT:
88 case VK_FORMAT_R16G16B16A16_SFLOAT:
89 case VK_FORMAT_R16G16_SNORM:
90 case VK_FORMAT_R16G16B16A16_SNORM:
91 mandatory = true;
92 break;
93 default:
94 break;
95 }
96
97 return mandatory;
98 }
99
checkAccelerationStructureVertexBufferFormat(const vk::InstanceInterface & vki,vk::VkPhysicalDevice physicalDevice,vk::VkFormat format)100 void checkAccelerationStructureVertexBufferFormat (const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice, vk::VkFormat format)
101 {
102 const vk::VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
103
104 if ((formatProperties.bufferFeatures & vk::VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR) == 0u)
105 {
106 const std::string errorMsg = "Format not supported for acceleration structure vertex buffers";
107 if (isMandatoryAccelerationStructureVertexBufferFormat(format))
108 TCU_FAIL(errorMsg);
109 TCU_THROW(NotSupportedError, errorMsg);
110 }
111 }
112
getCommonRayGenerationShader(void)113 std::string getCommonRayGenerationShader (void)
114 {
115 return
116 "#version 460 core\n"
117 "#extension GL_EXT_ray_tracing : require\n"
118 "layout(location = 0) rayPayloadEXT vec3 hitValue;\n"
119 "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
120 "\n"
121 "void main()\n"
122 "{\n"
123 " uint rayFlags = 0;\n"
124 " uint cullMask = 0xFF;\n"
125 " float tmin = 0.0;\n"
126 " float tmax = 9.0;\n"
127 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
128 " vec3 direct = vec3(0.0, 0.0, -1.0);\n"
129 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
130 "}\n";
131 }
132
RaytracedGeometryBase(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType)133 RaytracedGeometryBase::RaytracedGeometryBase (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType)
134 : m_geometryType (geometryType)
135 , m_vertexFormat (vertexFormat)
136 , m_indexType (indexType)
137 , m_geometryFlags ((VkGeometryFlagsKHR)0u)
138 , m_hasOpacityMicromap (false)
139 {
140 if (m_geometryType == VK_GEOMETRY_TYPE_AABBS_KHR)
141 DE_ASSERT(m_vertexFormat == VK_FORMAT_R32G32B32_SFLOAT);
142 }
143
~RaytracedGeometryBase()144 RaytracedGeometryBase::~RaytracedGeometryBase ()
145 {
146 }
147
148 struct GeometryBuilderParams
149 {
150 VkGeometryTypeKHR geometryType;
151 bool usePadding;
152 };
153
154 template <typename V, typename I>
buildRaytracedGeometry(const GeometryBuilderParams & params)155 RaytracedGeometryBase* buildRaytracedGeometry (const GeometryBuilderParams& params)
156 {
157 return new RaytracedGeometry<V, I>(params.geometryType, (params.usePadding ? 1u : 0u));
158 }
159
makeRaytracedGeometry(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType,bool padVertices)160 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType, bool padVertices)
161 {
162 const GeometryBuilderParams builderParams { geometryType, padVertices };
163
164 switch (vertexFormat)
165 {
166 case VK_FORMAT_R32G32_SFLOAT:
167 switch (indexType)
168 {
169 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint16>(builderParams));
170 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint32>(builderParams));
171 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, EmptyIndex>(builderParams));
172 default: TCU_THROW(InternalError, "Wrong index type");
173 }
174 case VK_FORMAT_R32G32B32_SFLOAT:
175 switch (indexType)
176 {
177 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint16>(builderParams));
178 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint32>(builderParams));
179 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, EmptyIndex>(builderParams));
180 default: TCU_THROW(InternalError, "Wrong index type");
181 }
182 case VK_FORMAT_R32G32B32A32_SFLOAT:
183 switch (indexType)
184 {
185 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint16>(builderParams));
186 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint32>(builderParams));
187 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, EmptyIndex>(builderParams));
188 default: TCU_THROW(InternalError, "Wrong index type");
189 }
190 case VK_FORMAT_R16G16_SFLOAT:
191 switch (indexType)
192 {
193 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint16>(builderParams));
194 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint32>(builderParams));
195 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, EmptyIndex>(builderParams));
196 default: TCU_THROW(InternalError, "Wrong index type");
197 }
198 case VK_FORMAT_R16G16B16_SFLOAT:
199 switch (indexType)
200 {
201 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint16>(builderParams));
202 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint32>(builderParams));
203 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, EmptyIndex>(builderParams));
204 default: TCU_THROW(InternalError, "Wrong index type");
205 }
206 case VK_FORMAT_R16G16B16A16_SFLOAT:
207 switch (indexType)
208 {
209 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint16>(builderParams));
210 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint32>(builderParams));
211 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, EmptyIndex>(builderParams));
212 default: TCU_THROW(InternalError, "Wrong index type");
213 }
214 case VK_FORMAT_R16G16_SNORM:
215 switch (indexType)
216 {
217 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint16>(builderParams));
218 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint32>(builderParams));
219 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, EmptyIndex>(builderParams));
220 default: TCU_THROW(InternalError, "Wrong index type");
221 }
222 case VK_FORMAT_R16G16B16_SNORM:
223 switch (indexType)
224 {
225 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint16>(builderParams));
226 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint32>(builderParams));
227 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, EmptyIndex>(builderParams));
228 default: TCU_THROW(InternalError, "Wrong index type");
229 }
230 case VK_FORMAT_R16G16B16A16_SNORM:
231 switch (indexType)
232 {
233 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint16>(builderParams));
234 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint32>(builderParams));
235 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, EmptyIndex>(builderParams));
236 default: TCU_THROW(InternalError, "Wrong index type");
237 }
238 case VK_FORMAT_R64G64_SFLOAT:
239 switch (indexType)
240 {
241 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint16>(builderParams));
242 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint32>(builderParams));
243 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, EmptyIndex>(builderParams));
244 default: TCU_THROW(InternalError, "Wrong index type");
245 }
246 case VK_FORMAT_R64G64B64_SFLOAT:
247 switch (indexType)
248 {
249 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint16>(builderParams));
250 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint32>(builderParams));
251 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, EmptyIndex>(builderParams));
252 default: TCU_THROW(InternalError, "Wrong index type");
253 }
254 case VK_FORMAT_R64G64B64A64_SFLOAT:
255 switch (indexType)
256 {
257 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint16>(builderParams));
258 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint32>(builderParams));
259 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, EmptyIndex>(builderParams));
260 default: TCU_THROW(InternalError, "Wrong index type");
261 }
262 case VK_FORMAT_R8G8_SNORM:
263 switch (indexType)
264 {
265 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint16>(builderParams));
266 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint32>(builderParams));
267 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, EmptyIndex>(builderParams));
268 default: TCU_THROW(InternalError, "Wrong index type");
269 }
270 case VK_FORMAT_R8G8B8_SNORM:
271 switch (indexType)
272 {
273 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint16>(builderParams));
274 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint32>(builderParams));
275 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, EmptyIndex>(builderParams));
276 default: TCU_THROW(InternalError, "Wrong index type");
277 }
278 case VK_FORMAT_R8G8B8A8_SNORM:
279 switch (indexType)
280 {
281 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint16>(builderParams));
282 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint32>(builderParams));
283 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, EmptyIndex>(builderParams));
284 default: TCU_THROW(InternalError, "Wrong index type");
285 }
286 default:
287 TCU_THROW(InternalError, "Wrong vertex format");
288 }
289
290 }
291
getBufferDeviceAddress(const DeviceInterface & vk,const VkDevice device,const VkBuffer buffer,VkDeviceSize offset)292 VkDeviceAddress getBufferDeviceAddress ( const DeviceInterface& vk,
293 const VkDevice device,
294 const VkBuffer buffer,
295 VkDeviceSize offset )
296 {
297
298 if (buffer == DE_NULL)
299 return 0;
300
301 VkBufferDeviceAddressInfo deviceAddressInfo
302 {
303 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType
304 DE_NULL, // const void* pNext
305 buffer // VkBuffer buffer;
306 };
307 return vk.getBufferDeviceAddress(device, &deviceAddressInfo) + offset;
308 }
309
310
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,deUint32 queryCount)311 static inline Move<VkQueryPool> makeQueryPool (const DeviceInterface& vk,
312 const VkDevice device,
313 const VkQueryType queryType,
314 deUint32 queryCount)
315 {
316 const VkQueryPoolCreateInfo queryPoolCreateInfo =
317 {
318 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // sType
319 DE_NULL, // pNext
320 (VkQueryPoolCreateFlags)0, // flags
321 queryType, // queryType
322 queryCount, // queryCount
323 0u, // pipelineStatistics
324 };
325 return createQueryPool(vk, device, &queryPoolCreateInfo);
326 }
327
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryTrianglesDataKHR & triangles)328 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryTrianglesDataKHR& triangles)
329 {
330 VkAccelerationStructureGeometryDataKHR result;
331
332 deMemset(&result, 0, sizeof(result));
333
334 result.triangles = triangles;
335
336 return result;
337 }
338
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryAabbsDataKHR & aabbs)339 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryAabbsDataKHR& aabbs)
340 {
341 VkAccelerationStructureGeometryDataKHR result;
342
343 deMemset(&result, 0, sizeof(result));
344
345 result.aabbs = aabbs;
346
347 return result;
348 }
349
makeVkAccelerationStructureInstancesDataKHR(const VkAccelerationStructureGeometryInstancesDataKHR & instances)350 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureInstancesDataKHR (const VkAccelerationStructureGeometryInstancesDataKHR& instances)
351 {
352 VkAccelerationStructureGeometryDataKHR result;
353
354 deMemset(&result, 0, sizeof(result));
355
356 result.instances = instances;
357
358 return result;
359 }
360
makeVkAccelerationStructureInstanceKHR(const VkTransformMatrixKHR & transform,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags,deUint64 accelerationStructureReference)361 static inline VkAccelerationStructureInstanceKHR makeVkAccelerationStructureInstanceKHR (const VkTransformMatrixKHR& transform,
362 deUint32 instanceCustomIndex,
363 deUint32 mask,
364 deUint32 instanceShaderBindingTableRecordOffset,
365 VkGeometryInstanceFlagsKHR flags,
366 deUint64 accelerationStructureReference)
367 {
368 VkAccelerationStructureInstanceKHR instance = { transform, 0, 0, 0, 0, accelerationStructureReference };
369 instance.instanceCustomIndex = instanceCustomIndex & 0xFFFFFF;
370 instance.mask = mask & 0xFF;
371 instance.instanceShaderBindingTableRecordOffset = instanceShaderBindingTableRecordOffset & 0xFFFFFF;
372 instance.flags = flags & 0xFF;
373 return instance;
374 }
375
getRayTracingShaderGroupHandlesKHR(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)376 VkResult getRayTracingShaderGroupHandlesKHR (const DeviceInterface& vk,
377 const VkDevice device,
378 const VkPipeline pipeline,
379 const deUint32 firstGroup,
380 const deUint32 groupCount,
381 const deUintptr dataSize,
382 void* pData)
383 {
384 return vk.getRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
385 }
386
getRayTracingShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)387 VkResult getRayTracingShaderGroupHandles (const DeviceInterface& vk,
388 const VkDevice device,
389 const VkPipeline pipeline,
390 const deUint32 firstGroup,
391 const deUint32 groupCount,
392 const deUintptr dataSize,
393 void* pData)
394 {
395 return getRayTracingShaderGroupHandlesKHR(vk, device, pipeline, firstGroup, groupCount, dataSize, pData);
396 }
397
getRayTracingCaptureReplayShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)398 VkResult getRayTracingCaptureReplayShaderGroupHandles (const DeviceInterface& vk,
399 const VkDevice device,
400 const VkPipeline pipeline,
401 const deUint32 firstGroup,
402 const deUint32 groupCount,
403 const deUintptr dataSize,
404 void* pData)
405 {
406 return vk.getRayTracingCaptureReplayShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
407 }
408
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation)409 VkResult finishDeferredOperation (const DeviceInterface& vk,
410 VkDevice device,
411 VkDeferredOperationKHR deferredOperation)
412 {
413 VkResult result = vk.deferredOperationJoinKHR(device, deferredOperation);
414
415 while (result == VK_THREAD_IDLE_KHR)
416 {
417 std::this_thread::yield();
418 result = vk.deferredOperationJoinKHR(device, deferredOperation);
419 }
420
421 switch( result )
422 {
423 case VK_SUCCESS:
424 {
425 // Deferred operation has finished. Query its result
426 result = vk.getDeferredOperationResultKHR(device, deferredOperation);
427
428 break;
429 }
430
431 case VK_THREAD_DONE_KHR:
432 {
433 // Deferred operation is being wrapped up by another thread
434 // wait for that thread to finish
435 do
436 {
437 std::this_thread::yield();
438 result = vk.getDeferredOperationResultKHR(device, deferredOperation);
439 } while (result == VK_NOT_READY);
440
441 break;
442 }
443
444 default:
445 {
446 DE_ASSERT(false);
447
448 break;
449 }
450 }
451
452 return result;
453 }
454
finishDeferredOperationThreaded(DeferredThreadParams * deferredThreadParams)455 void finishDeferredOperationThreaded (DeferredThreadParams* deferredThreadParams)
456 {
457 deferredThreadParams->result = finishDeferredOperation(deferredThreadParams->vk, deferredThreadParams->device, deferredThreadParams->deferredOperation);
458 }
459
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation,const deUint32 workerThreadCount,const bool operationNotDeferred)460 void finishDeferredOperation (const DeviceInterface& vk,
461 VkDevice device,
462 VkDeferredOperationKHR deferredOperation,
463 const deUint32 workerThreadCount,
464 const bool operationNotDeferred)
465 {
466
467 if (operationNotDeferred)
468 {
469 // when the operation deferral returns VK_OPERATION_NOT_DEFERRED_KHR,
470 // the deferred operation should act as if no command was deferred
471 VK_CHECK(vk.getDeferredOperationResultKHR(device, deferredOperation));
472
473
474 // there is not need to join any threads to the deferred operation,
475 // so below can be skipped.
476 return;
477 }
478
479 if (workerThreadCount == 0)
480 {
481 VK_CHECK(finishDeferredOperation(vk, device, deferredOperation));
482 }
483 else
484 {
485 const deUint32 maxThreadCountSupported = deMinu32(256u, vk.getDeferredOperationMaxConcurrencyKHR(device, deferredOperation));
486 const deUint32 requestedThreadCount = workerThreadCount;
487 const deUint32 testThreadCount = requestedThreadCount == std::numeric_limits<deUint32>::max() ? maxThreadCountSupported : requestedThreadCount;
488
489 if (maxThreadCountSupported == 0)
490 TCU_FAIL("vkGetDeferredOperationMaxConcurrencyKHR must not return 0");
491
492 const DeferredThreadParams deferredThreadParams =
493 {
494 vk, // const DeviceInterface& vk;
495 device, // VkDevice device;
496 deferredOperation, // VkDeferredOperationKHR deferredOperation;
497 VK_RESULT_MAX_ENUM, // VResult result;
498 };
499 std::vector<DeferredThreadParams> threadParams (testThreadCount, deferredThreadParams);
500 std::vector<de::MovePtr<std::thread> > threads (testThreadCount);
501 bool executionResult = false;
502
503 DE_ASSERT(threads.size() > 0 && threads.size() == testThreadCount);
504
505 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
506 threads[threadNdx] = de::MovePtr<std::thread>(new std::thread(finishDeferredOperationThreaded, &threadParams[threadNdx]));
507
508 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
509 threads[threadNdx]->join();
510
511 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
512 if (threadParams[threadNdx].result == VK_SUCCESS)
513 executionResult = true;
514
515 if (!executionResult)
516 TCU_FAIL("Neither reported VK_SUCCESS");
517 }
518 }
519
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const VkDeviceSize storageSize)520 SerialStorage::SerialStorage (const DeviceInterface& vk,
521 const VkDevice device,
522 Allocator& allocator,
523 const VkAccelerationStructureBuildTypeKHR buildType,
524 const VkDeviceSize storageSize)
525 : m_buildType (buildType)
526 , m_storageSize (storageSize)
527 , m_serialInfo ()
528 {
529 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
530 try
531 {
532 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
533 }
534 catch (const tcu::NotSupportedError&)
535 {
536 // retry without Cached flag
537 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
538 }
539 }
540
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const SerialInfo & serialInfo)541 SerialStorage::SerialStorage (const DeviceInterface& vk,
542 const VkDevice device,
543 Allocator& allocator,
544 const VkAccelerationStructureBuildTypeKHR buildType,
545 const SerialInfo& serialInfo)
546 : m_buildType (buildType)
547 , m_storageSize (serialInfo.sizes()[0]) // raise assertion if serialInfo is empty
548 , m_serialInfo (serialInfo)
549 {
550 DE_ASSERT(serialInfo.sizes().size() >= 2u);
551
552 // create buffer for top-level acceleration structure
553 {
554 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
555 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
556 }
557
558 // create buffers for bottom-level acceleration structures
559 {
560 std::vector<deUint64> addrs;
561
562 for (std::size_t i = 1; i < serialInfo.addresses().size(); ++i)
563 {
564 const deUint64& lookAddr = serialInfo.addresses()[i];
565 auto end = addrs.end();
566 auto match = std::find_if(addrs.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
567 if (match == end)
568 {
569 addrs.emplace_back(lookAddr);
570 m_bottoms.emplace_back(de::SharedPtr<SerialStorage>(new SerialStorage(vk, device, allocator, buildType, serialInfo.sizes()[i])));
571 }
572 }
573 }
574 }
575
getAddress(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)576 VkDeviceOrHostAddressKHR SerialStorage::getAddress (const DeviceInterface& vk,
577 const VkDevice device,
578 const VkAccelerationStructureBuildTypeKHR buildType)
579 {
580 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
581 return makeDeviceOrHostAddressKHR(vk, device, m_buffer->get(), 0);
582 else
583 return makeDeviceOrHostAddressKHR(m_buffer->getAllocation().getHostPtr());
584 }
585
getASHeader()586 SerialStorage::AccelerationStructureHeader* SerialStorage::getASHeader ()
587 {
588 return reinterpret_cast<AccelerationStructureHeader*>(getHostAddress().hostAddress);
589 }
590
hasDeepFormat() const591 bool SerialStorage::hasDeepFormat () const
592 {
593 return (m_serialInfo.sizes().size() >= 2u);
594 }
595
getBottomStorage(deUint32 index) const596 de::SharedPtr<SerialStorage> SerialStorage::getBottomStorage (deUint32 index) const
597 {
598 return m_bottoms[index];
599 }
600
getHostAddress(VkDeviceSize offset)601 VkDeviceOrHostAddressKHR SerialStorage::getHostAddress (VkDeviceSize offset)
602 {
603 DE_ASSERT(offset < m_storageSize);
604 return makeDeviceOrHostAddressKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
605 }
606
getHostAddressConst(VkDeviceSize offset)607 VkDeviceOrHostAddressConstKHR SerialStorage::getHostAddressConst (VkDeviceSize offset)
608 {
609 return makeDeviceOrHostAddressConstKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
610 }
611
getAddressConst(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)612 VkDeviceOrHostAddressConstKHR SerialStorage::getAddressConst (const DeviceInterface& vk,
613 const VkDevice device,
614 const VkAccelerationStructureBuildTypeKHR buildType)
615 {
616 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
617 return makeDeviceOrHostAddressConstKHR(vk, device, m_buffer->get(), 0);
618 else
619 return getHostAddressConst();
620 }
621
getStorageSize() const622 inline VkDeviceSize SerialStorage::getStorageSize () const
623 {
624 return m_storageSize;
625 }
626
getSerialInfo() const627 inline const SerialInfo& SerialStorage::getSerialInfo () const
628 {
629 return m_serialInfo;
630 }
631
getDeserializedSize()632 deUint64 SerialStorage::getDeserializedSize ()
633 {
634 deUint64 result = 0;
635 const deUint8* startPtr = static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr());
636
637 DE_ASSERT(sizeof(result) == DESERIALIZED_SIZE_SIZE);
638
639 deMemcpy(&result, startPtr + DESERIALIZED_SIZE_OFFSET, sizeof(result));
640
641 return result;
642 }
643
~BottomLevelAccelerationStructure()644 BottomLevelAccelerationStructure::~BottomLevelAccelerationStructure ()
645 {
646 }
647
BottomLevelAccelerationStructure()648 BottomLevelAccelerationStructure::BottomLevelAccelerationStructure ()
649 : m_structureSize (0u)
650 , m_updateScratchSize (0u)
651 , m_buildScratchSize (0u)
652 {
653 }
654
setGeometryData(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags)655 void BottomLevelAccelerationStructure::setGeometryData (const std::vector<tcu::Vec3>& geometryData,
656 const bool triangles,
657 const VkGeometryFlagsKHR geometryFlags)
658 {
659 if (triangles)
660 DE_ASSERT((geometryData.size() % 3) == 0);
661 else
662 DE_ASSERT((geometryData.size() % 2) == 0);
663
664 setGeometryCount(1u);
665
666 addGeometry(geometryData, triangles, geometryFlags);
667 }
668
setDefaultGeometryData(const VkShaderStageFlagBits testStage,const VkGeometryFlagsKHR geometryFlags)669 void BottomLevelAccelerationStructure::setDefaultGeometryData (const VkShaderStageFlagBits testStage,
670 const VkGeometryFlagsKHR geometryFlags)
671 {
672 bool trianglesData = false;
673 float z = 0.0f;
674 std::vector<tcu::Vec3> geometryData;
675
676 switch (testStage)
677 {
678 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: z = -1.0f; trianglesData = true; break;
679 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: z = -1.0f; trianglesData = true; break;
680 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: z = -1.0f; trianglesData = true; break;
681 case VK_SHADER_STAGE_MISS_BIT_KHR: z = -9.9f; trianglesData = true; break;
682 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: z = -1.0f; trianglesData = false; break;
683 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: z = -1.0f; trianglesData = true; break;
684 default: TCU_THROW(InternalError, "Unacceptable stage");
685 }
686
687 if (trianglesData)
688 {
689 geometryData.reserve(6);
690
691 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
692 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
693 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
694 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
695 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
696 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
697 }
698 else
699 {
700 geometryData.reserve(2);
701
702 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
703 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
704 }
705
706 setGeometryCount(1u);
707
708 addGeometry(geometryData, trianglesData, geometryFlags);
709 }
710
setGeometryCount(const size_t geometryCount)711 void BottomLevelAccelerationStructure::setGeometryCount (const size_t geometryCount)
712 {
713 m_geometriesData.clear();
714
715 m_geometriesData.reserve(geometryCount);
716 }
717
addGeometry(de::SharedPtr<RaytracedGeometryBase> & raytracedGeometry)718 void BottomLevelAccelerationStructure::addGeometry (de::SharedPtr<RaytracedGeometryBase>& raytracedGeometry)
719 {
720 m_geometriesData.push_back(raytracedGeometry);
721 }
722
addGeometry(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags,const VkAccelerationStructureTrianglesOpacityMicromapEXT * opacityGeometryMicromap)723 void BottomLevelAccelerationStructure::addGeometry (const std::vector<tcu::Vec3>& geometryData,
724 const bool triangles,
725 const VkGeometryFlagsKHR geometryFlags,
726 const VkAccelerationStructureTrianglesOpacityMicromapEXT* opacityGeometryMicromap)
727 {
728 DE_ASSERT(geometryData.size() > 0);
729 DE_ASSERT((triangles && geometryData.size() % 3 == 0) || (!triangles && geometryData.size() % 2 == 0));
730
731 if (!triangles)
732 for (size_t posNdx = 0; posNdx < geometryData.size() / 2; ++posNdx)
733 {
734 DE_ASSERT(geometryData[2 * posNdx].x() <= geometryData[2 * posNdx + 1].x());
735 DE_ASSERT(geometryData[2 * posNdx].y() <= geometryData[2 * posNdx + 1].y());
736 DE_ASSERT(geometryData[2 * posNdx].z() <= geometryData[2 * posNdx + 1].z());
737 }
738
739 de::SharedPtr<RaytracedGeometryBase> geometry = makeRaytracedGeometry(triangles ? VK_GEOMETRY_TYPE_TRIANGLES_KHR : VK_GEOMETRY_TYPE_AABBS_KHR, VK_FORMAT_R32G32B32_SFLOAT, VK_INDEX_TYPE_NONE_KHR);
740 for (auto it = begin(geometryData), eit = end(geometryData); it != eit; ++it)
741 geometry->addVertex(*it);
742
743 geometry->setGeometryFlags(geometryFlags);
744 if (opacityGeometryMicromap)
745 geometry->setOpacityMicromap(opacityGeometryMicromap);
746 addGeometry(geometry);
747 }
748
getStructureBuildSizes() const749 VkAccelerationStructureBuildSizesInfoKHR BottomLevelAccelerationStructure::getStructureBuildSizes () const
750 {
751 return
752 {
753 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
754 DE_NULL, // const void* pNext;
755 m_structureSize, // VkDeviceSize accelerationStructureSize;
756 m_updateScratchSize, // VkDeviceSize updateScratchSize;
757 m_buildScratchSize // VkDeviceSize buildScratchSize;
758 };
759 };
760
getVertexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)761 VkDeviceSize getVertexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
762 {
763 DE_ASSERT(geometriesData.size() != 0);
764 VkDeviceSize bufferSizeBytes = 0;
765 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
766 bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getVertexByteSize(),8);
767 return bufferSizeBytes;
768 }
769
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)770 BufferWithMemory* createVertexBuffer (const DeviceInterface& vk,
771 const VkDevice device,
772 Allocator& allocator,
773 const VkDeviceSize bufferSizeBytes)
774 {
775 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
776 return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
777 }
778
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)779 BufferWithMemory* createVertexBuffer (const DeviceInterface& vk,
780 const VkDevice device,
781 Allocator& allocator,
782 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
783 {
784 return createVertexBuffer(vk, device, allocator, getVertexBufferSize(geometriesData));
785 }
786
updateVertexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * vertexBuffer,VkDeviceSize geometriesOffset=0)787 void updateVertexBuffer (const DeviceInterface& vk,
788 const VkDevice device,
789 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData,
790 BufferWithMemory* vertexBuffer,
791 VkDeviceSize geometriesOffset = 0)
792 {
793 const Allocation& geometryAlloc = vertexBuffer->getAllocation();
794 deUint8* bufferStart = static_cast<deUint8*>(geometryAlloc.getHostPtr());
795 VkDeviceSize bufferOffset = geometriesOffset;
796
797 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
798 {
799 const void* geometryPtr = geometriesData[geometryNdx]->getVertexPointer();
800 const size_t geometryPtrSize = geometriesData[geometryNdx]->getVertexByteSize();
801
802 deMemcpy(&bufferStart[bufferOffset], geometryPtr, geometryPtrSize);
803
804 bufferOffset += deAlignSize(geometryPtrSize,8);
805 }
806
807 // Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
808 // align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
809 // for the vertex and index buffers, so flushing is actually not needed.
810 flushAlloc(vk, device, geometryAlloc);
811 }
812
getIndexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)813 VkDeviceSize getIndexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
814 {
815 DE_ASSERT(!geometriesData.empty());
816
817 VkDeviceSize bufferSizeBytes = 0;
818 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
819 if(geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
820 bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getIndexByteSize(),8);
821 return bufferSizeBytes;
822 }
823
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)824 BufferWithMemory* createIndexBuffer (const DeviceInterface& vk,
825 const VkDevice device,
826 Allocator& allocator,
827 const VkDeviceSize bufferSizeBytes)
828 {
829 DE_ASSERT(bufferSizeBytes);
830 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
831 return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
832 }
833
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)834 BufferWithMemory* createIndexBuffer (const DeviceInterface& vk,
835 const VkDevice device,
836 Allocator& allocator,
837 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
838 {
839 const VkDeviceSize bufferSizeBytes = getIndexBufferSize(geometriesData);
840 return bufferSizeBytes ? createIndexBuffer(vk, device, allocator, bufferSizeBytes) : nullptr;
841 }
842
updateIndexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * indexBuffer,VkDeviceSize geometriesOffset)843 void updateIndexBuffer (const DeviceInterface& vk,
844 const VkDevice device,
845 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData,
846 BufferWithMemory* indexBuffer,
847 VkDeviceSize geometriesOffset)
848 {
849 const Allocation& indexAlloc = indexBuffer->getAllocation();
850 deUint8* bufferStart = static_cast<deUint8*>(indexAlloc.getHostPtr());
851 VkDeviceSize bufferOffset = geometriesOffset;
852
853 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
854 {
855 if (geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
856 {
857 const void* indexPtr = geometriesData[geometryNdx]->getIndexPointer();
858 const size_t indexPtrSize = geometriesData[geometryNdx]->getIndexByteSize();
859
860 deMemcpy(&bufferStart[bufferOffset], indexPtr, indexPtrSize);
861
862 bufferOffset += deAlignSize(indexPtrSize, 8);
863 }
864 }
865
866 // Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
867 // align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
868 // for the vertex and index buffers, so flushing is actually not needed.
869 flushAlloc(vk, device, indexAlloc);
870 }
871
872 class BottomLevelAccelerationStructureKHR : public BottomLevelAccelerationStructure
873 {
874 public:
875 static deUint32 getRequiredAllocationCount (void);
876
877 BottomLevelAccelerationStructureKHR ();
878 BottomLevelAccelerationStructureKHR (const BottomLevelAccelerationStructureKHR& other) = delete;
879 virtual ~BottomLevelAccelerationStructureKHR ();
880
881 void setBuildType (const VkAccelerationStructureBuildTypeKHR buildType) override;
882 VkAccelerationStructureBuildTypeKHR getBuildType () const override;
883 void setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags) override;
884 void setCreateGeneric (bool createGeneric) override;
885 void setCreationBufferUnbounded (bool creationBufferUnbounded) override;
886 void setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags) override;
887 void setBuildWithoutGeometries (bool buildWithoutGeometries) override;
888 void setBuildWithoutPrimitives (bool buildWithoutPrimitives) override;
889 void setDeferredOperation (const bool deferredOperation,
890 const deUint32 workerThreadCount) override;
891 void setUseArrayOfPointers (const bool useArrayOfPointers) override;
892 void setUseMaintenance5 (const bool useMaintenance5) override;
893 void setIndirectBuildParameters (const VkBuffer indirectBuffer,
894 const VkDeviceSize indirectBufferOffset,
895 const deUint32 indirectBufferStride) override;
896 VkBuildAccelerationStructureFlagsKHR getBuildFlags () const override;
897
898 void create (const DeviceInterface& vk,
899 const VkDevice device,
900 Allocator& allocator,
901 VkDeviceSize structureSize,
902 VkDeviceAddress deviceAddress = 0u,
903 const void* pNext = DE_NULL,
904 const MemoryRequirement& addMemoryRequirement = MemoryRequirement::Any,
905 const VkBuffer creationBuffer = VK_NULL_HANDLE,
906 const VkDeviceSize creationBufferSize = 0u) override;
907 void build (const DeviceInterface& vk,
908 const VkDevice device,
909 const VkCommandBuffer cmdBuffer,
910 BottomLevelAccelerationStructure* srcAccelerationStructure = DE_NULL) override;
911 void copyFrom (const DeviceInterface& vk,
912 const VkDevice device,
913 const VkCommandBuffer cmdBuffer,
914 BottomLevelAccelerationStructure* accelerationStructure,
915 bool compactCopy) override;
916
917 void serialize (const DeviceInterface& vk,
918 const VkDevice device,
919 const VkCommandBuffer cmdBuffer,
920 SerialStorage* storage) override;
921 void deserialize (const DeviceInterface& vk,
922 const VkDevice device,
923 const VkCommandBuffer cmdBuffer,
924 SerialStorage* storage) override;
925
926 const VkAccelerationStructureKHR* getPtr (void) const override;
927 void updateGeometry (size_t geometryIndex,
928 de::SharedPtr<RaytracedGeometryBase>& raytracedGeometry) override;
929
930 protected:
931 VkAccelerationStructureBuildTypeKHR m_buildType;
932 VkAccelerationStructureCreateFlagsKHR m_createFlags;
933 bool m_createGeneric;
934 bool m_creationBufferUnbounded;
935 VkBuildAccelerationStructureFlagsKHR m_buildFlags;
936 bool m_buildWithoutGeometries;
937 bool m_buildWithoutPrimitives;
938 bool m_deferredOperation;
939 deUint32 m_workerThreadCount;
940 bool m_useArrayOfPointers;
941 bool m_useMaintenance5;
942 de::MovePtr<BufferWithMemory> m_accelerationStructureBuffer;
943 de::MovePtr<BufferWithMemory> m_vertexBuffer;
944 de::MovePtr<BufferWithMemory> m_indexBuffer;
945 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
946 de::UniquePtr<std::vector<deUint8>> m_hostScratchBuffer;
947 Move<VkAccelerationStructureKHR> m_accelerationStructureKHR;
948 VkBuffer m_indirectBuffer;
949 VkDeviceSize m_indirectBufferOffset;
950 deUint32 m_indirectBufferStride;
951
952 void prepareGeometries (const DeviceInterface& vk,
953 const VkDevice device,
954 std::vector<VkAccelerationStructureGeometryKHR>& accelerationStructureGeometriesKHR,
955 std::vector<VkAccelerationStructureGeometryKHR*>& accelerationStructureGeometriesKHRPointers,
956 std::vector<VkAccelerationStructureBuildRangeInfoKHR>& accelerationStructureBuildRangeInfoKHR,
957 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>& accelerationStructureGeometryMicromapsEXT,
958 std::vector<deUint32>& maxPrimitiveCounts,
959 VkDeviceSize vertexBufferOffset = 0,
960 VkDeviceSize indexBufferOffset = 0) const;
961
getAccelerationStructureBuffer() const962 virtual BufferWithMemory* getAccelerationStructureBuffer () const { return m_accelerationStructureBuffer.get(); }
getDeviceScratchBuffer() const963 virtual BufferWithMemory* getDeviceScratchBuffer () const { return m_deviceScratchBuffer.get(); }
getHostScratchBuffer() const964 virtual std::vector<deUint8>* getHostScratchBuffer () const { return m_hostScratchBuffer.get(); }
getVertexBuffer() const965 virtual BufferWithMemory* getVertexBuffer () const { return m_vertexBuffer.get(); }
getIndexBuffer() const966 virtual BufferWithMemory* getIndexBuffer () const { return m_indexBuffer.get(); }
967
getAccelerationStructureBufferOffset() const968 virtual VkDeviceSize getAccelerationStructureBufferOffset () const { return 0; }
getDeviceScratchBufferOffset() const969 virtual VkDeviceSize getDeviceScratchBufferOffset () const { return 0; }
getVertexBufferOffset() const970 virtual VkDeviceSize getVertexBufferOffset () const { return 0; }
getIndexBufferOffset() const971 virtual VkDeviceSize getIndexBufferOffset () const { return 0; }
972 };
973
getRequiredAllocationCount(void)974 deUint32 BottomLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
975 {
976 /*
977 de::MovePtr<BufferWithMemory> m_geometryBuffer; // but only when m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
978 de::MovePtr<Allocation> m_accelerationStructureAlloc;
979 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
980 */
981 return 3u;
982 }
983
~BottomLevelAccelerationStructureKHR()984 BottomLevelAccelerationStructureKHR::~BottomLevelAccelerationStructureKHR ()
985 {
986 }
987
BottomLevelAccelerationStructureKHR()988 BottomLevelAccelerationStructureKHR::BottomLevelAccelerationStructureKHR ()
989 : BottomLevelAccelerationStructure ()
990 , m_buildType (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
991 , m_createFlags (0u)
992 , m_createGeneric (false)
993 , m_creationBufferUnbounded (false)
994 , m_buildFlags (0u)
995 , m_buildWithoutGeometries (false)
996 , m_buildWithoutPrimitives (false)
997 , m_deferredOperation (false)
998 , m_workerThreadCount (0)
999 , m_useArrayOfPointers (false)
1000 , m_accelerationStructureBuffer (DE_NULL)
1001 , m_vertexBuffer (DE_NULL)
1002 , m_indexBuffer (DE_NULL)
1003 , m_deviceScratchBuffer (DE_NULL)
1004 , m_hostScratchBuffer (new std::vector<deUint8>)
1005 , m_accelerationStructureKHR ()
1006 , m_indirectBuffer (DE_NULL)
1007 , m_indirectBufferOffset (0)
1008 , m_indirectBufferStride (0)
1009 {
1010 }
1011
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)1012 void BottomLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR buildType)
1013 {
1014 m_buildType = buildType;
1015 }
1016
getBuildType() const1017 VkAccelerationStructureBuildTypeKHR BottomLevelAccelerationStructureKHR::getBuildType () const
1018 {
1019 return m_buildType;
1020 }
1021
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)1022 void BottomLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags)
1023 {
1024 m_createFlags = createFlags;
1025 }
1026
setCreateGeneric(bool createGeneric)1027 void BottomLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
1028 {
1029 m_createGeneric = createGeneric;
1030 }
1031
setCreationBufferUnbounded(bool creationBufferUnbounded)1032 void BottomLevelAccelerationStructureKHR::setCreationBufferUnbounded (bool creationBufferUnbounded)
1033 {
1034 m_creationBufferUnbounded = creationBufferUnbounded;
1035 }
1036
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)1037 void BottomLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags)
1038 {
1039 m_buildFlags = buildFlags;
1040 }
1041
setBuildWithoutGeometries(bool buildWithoutGeometries)1042 void BottomLevelAccelerationStructureKHR::setBuildWithoutGeometries (bool buildWithoutGeometries)
1043 {
1044 m_buildWithoutGeometries = buildWithoutGeometries;
1045 }
1046
setBuildWithoutPrimitives(bool buildWithoutPrimitives)1047 void BottomLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
1048 {
1049 m_buildWithoutPrimitives = buildWithoutPrimitives;
1050 }
1051
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)1052 void BottomLevelAccelerationStructureKHR::setDeferredOperation (const bool deferredOperation,
1053 const deUint32 workerThreadCount)
1054 {
1055 m_deferredOperation = deferredOperation;
1056 m_workerThreadCount = workerThreadCount;
1057 }
1058
setUseArrayOfPointers(const bool useArrayOfPointers)1059 void BottomLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool useArrayOfPointers)
1060 {
1061 m_useArrayOfPointers = useArrayOfPointers;
1062 }
1063
setUseMaintenance5(const bool useMaintenance5)1064 void BottomLevelAccelerationStructureKHR::setUseMaintenance5(const bool useMaintenance5)
1065 {
1066 m_useMaintenance5 = useMaintenance5;
1067 }
1068
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)1069 void BottomLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer indirectBuffer,
1070 const VkDeviceSize indirectBufferOffset,
1071 const deUint32 indirectBufferStride)
1072 {
1073 m_indirectBuffer = indirectBuffer;
1074 m_indirectBufferOffset = indirectBufferOffset;
1075 m_indirectBufferStride = indirectBufferStride;
1076 }
1077
getBuildFlags() const1078 VkBuildAccelerationStructureFlagsKHR BottomLevelAccelerationStructureKHR::getBuildFlags () const
1079 {
1080 return m_buildFlags;
1081 }
1082
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement,const VkBuffer creationBuffer,const VkDeviceSize creationBufferSize)1083 void BottomLevelAccelerationStructureKHR::create (const DeviceInterface& vk,
1084 const VkDevice device,
1085 Allocator& allocator,
1086 VkDeviceSize structureSize,
1087 VkDeviceAddress deviceAddress,
1088 const void* pNext,
1089 const MemoryRequirement& addMemoryRequirement,
1090 const VkBuffer creationBuffer,
1091 const VkDeviceSize creationBufferSize)
1092 {
1093 // AS may be built from geometries using vkCmdBuildAccelerationStructuresKHR / vkBuildAccelerationStructuresKHR
1094 // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
1095 DE_ASSERT(!m_geometriesData.empty() != !(structureSize == 0)); // logical xor
1096
1097 if (structureSize == 0)
1098 {
1099 std::vector<VkAccelerationStructureGeometryKHR> accelerationStructureGeometriesKHR;
1100 std::vector<VkAccelerationStructureGeometryKHR*> accelerationStructureGeometriesKHRPointers;
1101 std::vector<VkAccelerationStructureBuildRangeInfoKHR> accelerationStructureBuildRangeInfoKHR;
1102 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1103 std::vector<deUint32> maxPrimitiveCounts;
1104 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
1105
1106 const VkAccelerationStructureGeometryKHR* accelerationStructureGeometriesKHRPointer = accelerationStructureGeometriesKHR.data();
1107 const VkAccelerationStructureGeometryKHR* const* accelerationStructureGeometry = accelerationStructureGeometriesKHRPointers.data();
1108
1109 const deUint32 geometryCount = (m_buildWithoutGeometries
1110 ? 0u
1111 : static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1112 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
1113 {
1114 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
1115 DE_NULL, // const void* pNext;
1116 VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
1117 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
1118 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
1119 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
1120 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
1121 geometryCount, // deUint32 geometryCount;
1122 m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer, // const VkAccelerationStructureGeometryKHR* pGeometries;
1123 m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL, // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
1124 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
1125 };
1126 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
1127 {
1128 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
1129 DE_NULL, // const void* pNext;
1130 0, // VkDeviceSize accelerationStructureSize;
1131 0, // VkDeviceSize updateScratchSize;
1132 0 // VkDeviceSize buildScratchSize;
1133 };
1134
1135 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
1136
1137 m_structureSize = sizeInfo.accelerationStructureSize;
1138 m_updateScratchSize = sizeInfo.updateScratchSize;
1139 m_buildScratchSize = sizeInfo.buildScratchSize;
1140 }
1141 else
1142 {
1143 m_structureSize = structureSize;
1144 m_updateScratchSize = 0u;
1145 m_buildScratchSize = 0u;
1146 }
1147
1148 const bool externalCreationBuffer = (creationBuffer != VK_NULL_HANDLE);
1149
1150 if (externalCreationBuffer)
1151 {
1152 DE_UNREF(creationBufferSize); // For release builds.
1153 DE_ASSERT(creationBufferSize >= m_structureSize);
1154 }
1155
1156 if (!externalCreationBuffer)
1157 {
1158 VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1159 VkBufferUsageFlags2CreateInfoKHR bufferUsageFlags2 = vk::initVulkanStructure();
1160
1161 if (m_useMaintenance5)
1162 {
1163 bufferUsageFlags2.usage = VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR;
1164 bufferCreateInfo.pNext = &bufferUsageFlags2;
1165 bufferCreateInfo.usage = 0;
1166 }
1167
1168 const MemoryRequirement memoryRequirement = addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
1169 const bool bindMemOnCreation = (!m_creationBufferUnbounded);
1170
1171 try
1172 {
1173 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, (MemoryRequirement::Cached | memoryRequirement), bindMemOnCreation));
1174 }
1175 catch (const tcu::NotSupportedError&)
1176 {
1177 // retry without Cached flag
1178 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement, bindMemOnCreation));
1179 }
1180 }
1181
1182 const auto createInfoBuffer = (externalCreationBuffer ? creationBuffer : getAccelerationStructureBuffer()->get());
1183 const auto createInfoOffset = (externalCreationBuffer ? static_cast<VkDeviceSize>(0) : getAccelerationStructureBufferOffset());
1184 {
1185 const VkAccelerationStructureTypeKHR structureType = (m_createGeneric
1186 ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
1187 : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
1188 const VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfoKHR
1189 {
1190 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // VkStructureType sType;
1191 pNext, // const void* pNext;
1192 m_createFlags, // VkAccelerationStructureCreateFlagsKHR createFlags;
1193 createInfoBuffer, // VkBuffer buffer;
1194 createInfoOffset, // VkDeviceSize offset;
1195 m_structureSize, // VkDeviceSize size;
1196 structureType, // VkAccelerationStructureTypeKHR type;
1197 deviceAddress // VkDeviceAddress deviceAddress;
1198 };
1199
1200 m_accelerationStructureKHR = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
1201
1202 // Make sure buffer memory is always bound after creation.
1203 if (!externalCreationBuffer)
1204 m_accelerationStructureBuffer->bindMemory();
1205 }
1206
1207 if (m_buildScratchSize > 0u)
1208 {
1209 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1210 {
1211 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1212 m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1213 }
1214 else
1215 {
1216 m_hostScratchBuffer->resize(static_cast<size_t>(m_buildScratchSize));
1217 }
1218 }
1219
1220 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR && !m_geometriesData.empty())
1221 {
1222 VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(getVertexBufferSize(m_geometriesData), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1223 VkBufferUsageFlags2CreateInfoKHR bufferUsageFlags2 = vk::initVulkanStructure();
1224
1225 if (m_useMaintenance5)
1226 {
1227 bufferUsageFlags2.usage = vk::VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR;
1228 bufferCreateInfo.pNext = &bufferUsageFlags2;
1229 bufferCreateInfo.usage = 0;
1230 }
1231
1232 const vk::MemoryRequirement memoryRequirement = MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
1233 m_vertexBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
1234
1235 bufferCreateInfo.size = getIndexBufferSize(m_geometriesData);
1236 if (bufferCreateInfo.size)
1237 m_indexBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
1238 else
1239 m_indexBuffer = de::MovePtr<BufferWithMemory>(nullptr);
1240 }
1241 }
1242
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,BottomLevelAccelerationStructure * srcAccelerationStructure)1243 void BottomLevelAccelerationStructureKHR::build (const DeviceInterface& vk,
1244 const VkDevice device,
1245 const VkCommandBuffer cmdBuffer,
1246 BottomLevelAccelerationStructure* srcAccelerationStructure)
1247 {
1248 DE_ASSERT(!m_geometriesData.empty());
1249 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1250 DE_ASSERT(m_buildScratchSize != 0);
1251
1252 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1253 {
1254 updateVertexBuffer(vk, device, m_geometriesData, getVertexBuffer(), getVertexBufferOffset());
1255 if(getIndexBuffer() != DE_NULL)
1256 updateIndexBuffer(vk, device, m_geometriesData, getIndexBuffer(), getIndexBufferOffset());
1257 }
1258
1259 {
1260 std::vector<VkAccelerationStructureGeometryKHR> accelerationStructureGeometriesKHR;
1261 std::vector<VkAccelerationStructureGeometryKHR*> accelerationStructureGeometriesKHRPointers;
1262 std::vector<VkAccelerationStructureBuildRangeInfoKHR> accelerationStructureBuildRangeInfoKHR;
1263 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1264 std::vector<deUint32> maxPrimitiveCounts;
1265
1266 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers,
1267 accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts, getVertexBufferOffset(), getIndexBufferOffset());
1268
1269 const VkAccelerationStructureGeometryKHR* accelerationStructureGeometriesKHRPointer = accelerationStructureGeometriesKHR.data();
1270 const VkAccelerationStructureGeometryKHR* const* accelerationStructureGeometry = accelerationStructureGeometriesKHRPointers.data();
1271 VkDeviceOrHostAddressKHR scratchData = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1272 ? makeDeviceOrHostAddressKHR(vk, device, getDeviceScratchBuffer()->get(), getDeviceScratchBufferOffset())
1273 : makeDeviceOrHostAddressKHR(getHostScratchBuffer()->data());
1274 const deUint32 geometryCount = (m_buildWithoutGeometries
1275 ? 0u
1276 : static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1277
1278 VkAccelerationStructureKHR srcStructure = (srcAccelerationStructure != DE_NULL) ? *(srcAccelerationStructure->getPtr()) : DE_NULL;
1279 VkBuildAccelerationStructureModeKHR mode = (srcAccelerationStructure != DE_NULL) ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
1280
1281 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
1282 {
1283 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
1284 DE_NULL, // const void* pNext;
1285 VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
1286 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
1287 mode, // VkBuildAccelerationStructureModeKHR mode;
1288 srcStructure, // VkAccelerationStructureKHR srcAccelerationStructure;
1289 m_accelerationStructureKHR.get(), // VkAccelerationStructureKHR dstAccelerationStructure;
1290 geometryCount, // deUint32 geometryCount;
1291 m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer, // const VkAccelerationStructureGeometryKHR* pGeometries;
1292 m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL, // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
1293 scratchData // VkDeviceOrHostAddressKHR scratchData;
1294 };
1295
1296 VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr = accelerationStructureBuildRangeInfoKHR.data();
1297
1298 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1299 {
1300 if (m_indirectBuffer == DE_NULL)
1301 vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1302 else
1303 {
1304 VkDeviceAddress indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
1305 deUint32* pMaxPrimitiveCounts = maxPrimitiveCounts.data();
1306 vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
1307 }
1308 }
1309 else if (!m_deferredOperation)
1310 {
1311 VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
1312 }
1313 else
1314 {
1315 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1316 const auto deferredOperation = deferredOperationPtr.get();
1317
1318 VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1319
1320 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1321
1322 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1323 }
1324 }
1325
1326 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1327 {
1328 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1329 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1330
1331 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1332 }
1333 }
1334
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,BottomLevelAccelerationStructure * accelerationStructure,bool compactCopy)1335 void BottomLevelAccelerationStructureKHR::copyFrom (const DeviceInterface& vk,
1336 const VkDevice device,
1337 const VkCommandBuffer cmdBuffer,
1338 BottomLevelAccelerationStructure* accelerationStructure,
1339 bool compactCopy)
1340 {
1341 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1342 DE_ASSERT(accelerationStructure != DE_NULL);
1343
1344 VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1345 {
1346 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
1347 DE_NULL, // const void* pNext;
1348 *(accelerationStructure->getPtr()), // VkAccelerationStructureKHR src;
1349 *(getPtr()), // VkAccelerationStructureKHR dst;
1350 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR // VkCopyAccelerationStructureModeKHR mode;
1351 };
1352
1353 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1354 {
1355 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
1356 }
1357 else if (!m_deferredOperation)
1358 {
1359 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
1360 }
1361 else
1362 {
1363 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1364 const auto deferredOperation = deferredOperationPtr.get();
1365
1366 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
1367
1368 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1369
1370 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1371 }
1372
1373 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1374 {
1375 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1376 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1377
1378 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1379 }
1380 }
1381
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1382 void BottomLevelAccelerationStructureKHR::serialize (const DeviceInterface& vk,
1383 const VkDevice device,
1384 const VkCommandBuffer cmdBuffer,
1385 SerialStorage* storage)
1386 {
1387 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1388 DE_ASSERT(storage != DE_NULL);
1389
1390 const VkCopyAccelerationStructureToMemoryInfoKHR copyAccelerationStructureInfo =
1391 {
1392 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, // VkStructureType sType;
1393 DE_NULL, // const void* pNext;
1394 *(getPtr()), // VkAccelerationStructureKHR src;
1395 storage->getAddress(vk, device, m_buildType), // VkDeviceOrHostAddressKHR dst;
1396 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
1397 };
1398
1399 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1400 {
1401 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, ©AccelerationStructureInfo);
1402 }
1403 else if (!m_deferredOperation)
1404 {
1405 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, ©AccelerationStructureInfo));
1406 }
1407 else
1408 {
1409 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1410 const auto deferredOperation = deferredOperationPtr.get();
1411
1412 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, ©AccelerationStructureInfo);
1413
1414 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1415
1416 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1417 }
1418 }
1419
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1420 void BottomLevelAccelerationStructureKHR::deserialize (const DeviceInterface& vk,
1421 const VkDevice device,
1422 const VkCommandBuffer cmdBuffer,
1423 SerialStorage* storage)
1424 {
1425 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1426 DE_ASSERT(storage != DE_NULL);
1427
1428 const VkCopyMemoryToAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1429 {
1430 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
1431 DE_NULL, // const void* pNext;
1432 storage->getAddressConst(vk, device, m_buildType), // VkDeviceOrHostAddressConstKHR src;
1433 *(getPtr()), // VkAccelerationStructureKHR dst;
1434 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
1435 };
1436
1437 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1438 {
1439 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
1440 }
1441 else if (!m_deferredOperation)
1442 {
1443 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
1444 }
1445 else
1446 {
1447 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1448 const auto deferredOperation = deferredOperationPtr.get();
1449
1450 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
1451
1452 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1453
1454 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1455 }
1456
1457 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1458 {
1459 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1460 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1461
1462 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1463 }
1464 }
1465
getPtr(void) const1466 const VkAccelerationStructureKHR* BottomLevelAccelerationStructureKHR::getPtr (void) const
1467 {
1468 return &m_accelerationStructureKHR.get();
1469 }
1470
prepareGeometries(const DeviceInterface & vk,const VkDevice device,std::vector<VkAccelerationStructureGeometryKHR> & accelerationStructureGeometriesKHR,std::vector<VkAccelerationStructureGeometryKHR * > & accelerationStructureGeometriesKHRPointers,std::vector<VkAccelerationStructureBuildRangeInfoKHR> & accelerationStructureBuildRangeInfoKHR,std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> & accelerationStructureGeometryMicromapsEXT,std::vector<deUint32> & maxPrimitiveCounts,VkDeviceSize vertexBufferOffset,VkDeviceSize indexBufferOffset) const1471 void BottomLevelAccelerationStructureKHR::prepareGeometries (const DeviceInterface& vk,
1472 const VkDevice device,
1473 std::vector<VkAccelerationStructureGeometryKHR>& accelerationStructureGeometriesKHR,
1474 std::vector<VkAccelerationStructureGeometryKHR*>& accelerationStructureGeometriesKHRPointers,
1475 std::vector<VkAccelerationStructureBuildRangeInfoKHR>& accelerationStructureBuildRangeInfoKHR,
1476 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>& accelerationStructureGeometryMicromapsEXT,
1477 std::vector<deUint32>& maxPrimitiveCounts,
1478 VkDeviceSize vertexBufferOffset,
1479 VkDeviceSize indexBufferOffset) const
1480 {
1481 accelerationStructureGeometriesKHR.resize(m_geometriesData.size());
1482 accelerationStructureGeometriesKHRPointers.resize(m_geometriesData.size());
1483 accelerationStructureBuildRangeInfoKHR.resize(m_geometriesData.size());
1484 accelerationStructureGeometryMicromapsEXT.resize(m_geometriesData.size());
1485 maxPrimitiveCounts.resize(m_geometriesData.size());
1486
1487 for (size_t geometryNdx = 0; geometryNdx < m_geometriesData.size(); ++geometryNdx)
1488 {
1489 const de::SharedPtr<RaytracedGeometryBase>& geometryData = m_geometriesData[geometryNdx];
1490 VkDeviceOrHostAddressConstKHR vertexData, indexData;
1491 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1492 {
1493 if (getVertexBuffer() != DE_NULL)
1494 {
1495 vertexData = makeDeviceOrHostAddressConstKHR(vk, device, getVertexBuffer()->get(), vertexBufferOffset);
1496 if (m_indirectBuffer == DE_NULL )
1497 {
1498 vertexBufferOffset += deAlignSize(geometryData->getVertexByteSize(), 8);
1499 }
1500 }
1501 else
1502 vertexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1503
1504 if (getIndexBuffer() != DE_NULL && geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1505 {
1506 indexData = makeDeviceOrHostAddressConstKHR(vk, device, getIndexBuffer()->get(), indexBufferOffset);
1507 indexBufferOffset += deAlignSize(geometryData->getIndexByteSize(), 8);
1508 }
1509 else
1510 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1511 }
1512 else
1513 {
1514 vertexData = makeDeviceOrHostAddressConstKHR(geometryData->getVertexPointer());
1515 if (geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1516 indexData = makeDeviceOrHostAddressConstKHR(geometryData->getIndexPointer());
1517 else
1518 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1519 }
1520
1521 VkAccelerationStructureGeometryTrianglesDataKHR accelerationStructureGeometryTrianglesDataKHR =
1522 {
1523 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // VkStructureType sType;
1524 DE_NULL, // const void* pNext;
1525 geometryData->getVertexFormat(), // VkFormat vertexFormat;
1526 vertexData, // VkDeviceOrHostAddressConstKHR vertexData;
1527 geometryData->getVertexStride(), // VkDeviceSize vertexStride;
1528 static_cast<deUint32>(geometryData->getVertexCount()), // uint32_t maxVertex;
1529 geometryData->getIndexType(), // VkIndexType indexType;
1530 indexData, // VkDeviceOrHostAddressConstKHR indexData;
1531 makeDeviceOrHostAddressConstKHR(DE_NULL), // VkDeviceOrHostAddressConstKHR transformData;
1532 };
1533
1534 if (geometryData->getHasOpacityMicromap())
1535 accelerationStructureGeometryTrianglesDataKHR.pNext = &geometryData->getOpacityMicromap();
1536
1537 const VkAccelerationStructureGeometryAabbsDataKHR accelerationStructureGeometryAabbsDataKHR =
1538 {
1539 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // VkStructureType sType;
1540 DE_NULL, // const void* pNext;
1541 vertexData, // VkDeviceOrHostAddressConstKHR data;
1542 geometryData->getAABBStride() // VkDeviceSize stride;
1543 };
1544 const VkAccelerationStructureGeometryDataKHR geometry = (geometryData->isTrianglesType())
1545 ? makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryTrianglesDataKHR)
1546 : makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryAabbsDataKHR);
1547 const VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR =
1548 {
1549 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // VkStructureType sType;
1550 DE_NULL, // const void* pNext;
1551 geometryData->getGeometryType(), // VkGeometryTypeKHR geometryType;
1552 geometry, // VkAccelerationStructureGeometryDataKHR geometry;
1553 geometryData->getGeometryFlags() // VkGeometryFlagsKHR flags;
1554 };
1555
1556 const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : geometryData->getPrimitiveCount());
1557
1558 const VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfosKHR =
1559 {
1560 primitiveCount, // deUint32 primitiveCount;
1561 0, // deUint32 primitiveOffset;
1562 0, // deUint32 firstVertex;
1563 0 // deUint32 firstTransform;
1564 };
1565
1566 accelerationStructureGeometriesKHR[geometryNdx] = accelerationStructureGeometryKHR;
1567 accelerationStructureGeometriesKHRPointers[geometryNdx] = &accelerationStructureGeometriesKHR[geometryNdx];
1568 accelerationStructureBuildRangeInfoKHR[geometryNdx] = accelerationStructureBuildRangeInfosKHR;
1569 maxPrimitiveCounts[geometryNdx] = geometryData->getPrimitiveCount();
1570 }
1571 }
1572
getRequiredAllocationCount(void)1573 deUint32 BottomLevelAccelerationStructure::getRequiredAllocationCount (void)
1574 {
1575 return BottomLevelAccelerationStructureKHR::getRequiredAllocationCount();
1576 }
1577
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)1578 void BottomLevelAccelerationStructure::createAndBuild (const DeviceInterface& vk,
1579 const VkDevice device,
1580 const VkCommandBuffer cmdBuffer,
1581 Allocator& allocator,
1582 VkDeviceAddress deviceAddress)
1583 {
1584 create(vk, device, allocator, 0u, deviceAddress);
1585 build(vk, device, cmdBuffer);
1586 }
1587
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,BottomLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)1588 void BottomLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface& vk,
1589 const VkDevice device,
1590 const VkCommandBuffer cmdBuffer,
1591 Allocator& allocator,
1592 BottomLevelAccelerationStructure* accelerationStructure,
1593 VkDeviceSize compactCopySize,
1594 VkDeviceAddress deviceAddress)
1595 {
1596 DE_ASSERT(accelerationStructure != NULL);
1597 VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
1598 DE_ASSERT(copiedSize != 0u);
1599
1600 create(vk, device, allocator, copiedSize, deviceAddress);
1601 copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
1602 }
1603
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)1604 void BottomLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
1605 const VkDevice device,
1606 const VkCommandBuffer cmdBuffer,
1607 Allocator& allocator,
1608 SerialStorage* storage,
1609 VkDeviceAddress deviceAddress )
1610 {
1611 DE_ASSERT(storage != NULL);
1612 DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
1613 create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
1614 deserialize(vk, device, cmdBuffer, storage);
1615 }
1616
updateGeometry(size_t geometryIndex,de::SharedPtr<RaytracedGeometryBase> & raytracedGeometry)1617 void BottomLevelAccelerationStructureKHR::updateGeometry (size_t geometryIndex,
1618 de::SharedPtr<RaytracedGeometryBase>& raytracedGeometry)
1619 {
1620 DE_ASSERT(geometryIndex < m_geometriesData.size());
1621 m_geometriesData[geometryIndex] = raytracedGeometry;
1622 }
1623
makeBottomLevelAccelerationStructure()1624 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure ()
1625 {
1626 return de::MovePtr<BottomLevelAccelerationStructure>(new BottomLevelAccelerationStructureKHR);
1627 }
1628
1629 // Forward declaration
1630 struct BottomLevelAccelerationStructurePoolImpl;
1631
1632 class BottomLevelAccelerationStructurePoolMember : public BottomLevelAccelerationStructureKHR
1633 {
1634 public:
1635 friend class BottomLevelAccelerationStructurePool;
1636
1637 BottomLevelAccelerationStructurePoolMember (BottomLevelAccelerationStructurePoolImpl& pool);
1638 BottomLevelAccelerationStructurePoolMember (const BottomLevelAccelerationStructurePoolMember&) = delete;
1639 BottomLevelAccelerationStructurePoolMember (BottomLevelAccelerationStructurePoolMember&&) = delete;
1640 virtual ~BottomLevelAccelerationStructurePoolMember () = default;
1641
create(const DeviceInterface &,const VkDevice,Allocator &,VkDeviceSize,VkDeviceAddress,const void *,const MemoryRequirement &,const VkBuffer,const VkDeviceSize)1642 virtual void create (const DeviceInterface&,
1643 const VkDevice,
1644 Allocator&,
1645 VkDeviceSize,
1646 VkDeviceAddress,
1647 const void*,
1648 const MemoryRequirement&,
1649 const VkBuffer,
1650 const VkDeviceSize) override
1651 {
1652 DE_ASSERT(0); // Silent this method
1653 }
1654 virtual auto computeBuildSize (const DeviceInterface& vk,
1655 const VkDevice device,
1656 const VkDeviceSize strSize) const
1657 // accStrSize,updateScratch, buildScratch, vertexSize, indexSize
1658 -> std::tuple<VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize>;
1659 protected:
1660 struct Info;
1661 virtual void preCreateSetSizesAndOffsets (const Info& info,
1662 const VkDeviceSize accStrSize,
1663 const VkDeviceSize updateScratchSize,
1664 const VkDeviceSize buildScratchSize);
1665 virtual void createAccellerationStructure (const DeviceInterface& vk,
1666 const VkDevice device,
1667 VkDeviceAddress deviceAddress);
1668
1669 virtual BufferWithMemory* getAccelerationStructureBuffer () const override;
1670 virtual BufferWithMemory* getDeviceScratchBuffer () const override;
1671 virtual std::vector<deUint8>* getHostScratchBuffer () const override;
1672 virtual BufferWithMemory* getVertexBuffer () const override;
1673 virtual BufferWithMemory* getIndexBuffer () const override;
1674
getAccelerationStructureBufferOffset() const1675 virtual VkDeviceSize getAccelerationStructureBufferOffset () const override { return m_info.accStrOffset; }
getDeviceScratchBufferOffset() const1676 virtual VkDeviceSize getDeviceScratchBufferOffset () const override { return m_info.buildScratchBuffOffset; }
getVertexBufferOffset() const1677 virtual VkDeviceSize getVertexBufferOffset () const override { return m_info.vertBuffOffset; }
getIndexBufferOffset() const1678 virtual VkDeviceSize getIndexBufferOffset () const override { return m_info.indexBuffOffset; }
1679
1680 BottomLevelAccelerationStructurePoolImpl& m_pool;
1681
1682 struct Info
1683 {
1684 deUint32 accStrIndex;
1685 VkDeviceSize accStrOffset;
1686 deUint32 vertBuffIndex;
1687 VkDeviceSize vertBuffOffset;
1688 deUint32 indexBuffIndex;
1689 VkDeviceSize indexBuffOffset;
1690 deUint32 buildScratchBuffIndex;
1691 VkDeviceSize buildScratchBuffOffset;
1692 } m_info;
1693 };
1694
negz(const X &)1695 template<class X> inline X negz (const X&)
1696 {
1697 return (~static_cast<X>(0));
1698 }
isnegz(const X & x)1699 template<class X> inline bool isnegz (const X& x)
1700 {
1701 return x == negz(x);
1702 }
make_unsigned(const Y & y)1703 template<class Y> inline auto make_unsigned(const Y& y) -> typename std::make_unsigned<Y>::type
1704 {
1705 return static_cast<typename std::make_unsigned<Y>::type>(y);
1706 }
1707
BottomLevelAccelerationStructurePoolMember(BottomLevelAccelerationStructurePoolImpl & pool)1708 BottomLevelAccelerationStructurePoolMember::BottomLevelAccelerationStructurePoolMember (BottomLevelAccelerationStructurePoolImpl& pool)
1709 : m_pool (pool)
1710 , m_info {}
1711 {
1712 }
1713
1714 struct BottomLevelAccelerationStructurePoolImpl
1715 {
1716 BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePoolImpl&&) = delete;
1717 BottomLevelAccelerationStructurePoolImpl (const BottomLevelAccelerationStructurePoolImpl&) = delete;
1718 BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool);
1719
1720 BottomLevelAccelerationStructurePool& m_pool;
1721 std::vector<de::SharedPtr<BufferWithMemory>> m_accellerationStructureBuffers;
1722 de::SharedPtr<BufferWithMemory> m_deviceScratchBuffer;
1723 de::UniquePtr<std::vector<deUint8>> m_hostScratchBuffer;
1724 std::vector<de::SharedPtr<BufferWithMemory>> m_vertexBuffers;
1725 std::vector<de::SharedPtr<BufferWithMemory>> m_indexBuffers;
1726 };
BottomLevelAccelerationStructurePoolImpl(BottomLevelAccelerationStructurePool & pool)1727 BottomLevelAccelerationStructurePoolImpl::BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool)
1728 : m_pool (pool)
1729 , m_accellerationStructureBuffers ()
1730 , m_deviceScratchBuffer ()
1731 , m_hostScratchBuffer (new std::vector<deUint8>)
1732 , m_vertexBuffers ()
1733 , m_indexBuffers ()
1734 {
1735 }
getAccelerationStructureBuffer() const1736 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getAccelerationStructureBuffer () const
1737 {
1738 BufferWithMemory* result = nullptr;
1739 if (m_pool.m_accellerationStructureBuffers.size())
1740 {
1741 DE_ASSERT(!isnegz(m_info.accStrIndex));
1742 result = m_pool.m_accellerationStructureBuffers[m_info.accStrIndex].get();
1743 }
1744 return result;
1745 }
getDeviceScratchBuffer() const1746 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getDeviceScratchBuffer () const
1747 {
1748 DE_ASSERT(m_info.buildScratchBuffIndex == 0);
1749 return m_pool.m_deviceScratchBuffer.get();
1750 }
getHostScratchBuffer() const1751 std::vector<deUint8>* BottomLevelAccelerationStructurePoolMember::getHostScratchBuffer () const
1752 {
1753 return this->m_buildScratchSize ? m_pool.m_hostScratchBuffer.get() : nullptr;
1754 }
1755
getVertexBuffer() const1756 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getVertexBuffer () const
1757 {
1758 BufferWithMemory* result = nullptr;
1759 if (m_pool.m_vertexBuffers.size())
1760 {
1761 DE_ASSERT(!isnegz(m_info.vertBuffIndex));
1762 result = m_pool.m_vertexBuffers[m_info.vertBuffIndex].get();
1763 }
1764 return result;
1765 }
getIndexBuffer() const1766 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getIndexBuffer () const
1767 {
1768 BufferWithMemory* result = nullptr;
1769 if (m_pool.m_indexBuffers.size())
1770 {
1771 DE_ASSERT(!isnegz(m_info.indexBuffIndex));
1772 result = m_pool.m_indexBuffers[m_info.indexBuffIndex].get();
1773 }
1774 return result;
1775 }
1776
1777 struct BottomLevelAccelerationStructurePool::Impl : BottomLevelAccelerationStructurePoolImpl
1778 {
1779 friend class BottomLevelAccelerationStructurePool;
1780 friend class BottomLevelAccelerationStructurePoolMember;
1781
Implvk::BottomLevelAccelerationStructurePool::Impl1782 Impl (BottomLevelAccelerationStructurePool& pool)
1783 : BottomLevelAccelerationStructurePoolImpl(pool) { }
1784 };
1785
BottomLevelAccelerationStructurePool()1786 BottomLevelAccelerationStructurePool::BottomLevelAccelerationStructurePool ()
1787 : m_batchStructCount (4)
1788 , m_batchGeomCount (0)
1789 , m_infos ()
1790 , m_structs ()
1791 , m_createOnce (false)
1792 , m_tryCachedMemory (true)
1793 , m_structsBuffSize (0)
1794 , m_updatesScratchSize (0)
1795 , m_buildsScratchSize (0)
1796 , m_verticesSize (0)
1797 , m_indicesSize (0)
1798 , m_impl (new Impl(*this))
1799 {
1800 }
1801
~BottomLevelAccelerationStructurePool()1802 BottomLevelAccelerationStructurePool::~BottomLevelAccelerationStructurePool()
1803 {
1804 delete m_impl;
1805 }
1806
batchStructCount(const deUint32 & value)1807 void BottomLevelAccelerationStructurePool::batchStructCount (const deUint32& value)
1808 {
1809 DE_ASSERT(value >= 1); m_batchStructCount = value;
1810 }
1811
add(VkDeviceSize structureSize,VkDeviceAddress deviceAddress)1812 auto BottomLevelAccelerationStructurePool::add (VkDeviceSize structureSize,
1813 VkDeviceAddress deviceAddress) -> BottomLevelAccelerationStructurePool::BlasPtr
1814 {
1815 // Prevent a programmer from calling this method after batchCreate(...) method has been called.
1816 if (m_createOnce) DE_ASSERT(0);
1817
1818 auto blas = new BottomLevelAccelerationStructurePoolMember(*m_impl);
1819 m_infos.push_back({structureSize, deviceAddress});
1820 m_structs.emplace_back(blas);
1821 return m_structs.back();
1822 }
1823
adjustBatchCount(const DeviceInterface & vkd,const VkDevice device,const std::vector<BottomLevelAccelerationStructurePool::BlasPtr> & structs,const std::vector<BottomLevelAccelerationStructurePool::BlasInfo> & infos,const VkDeviceSize maxBufferSize,deUint32 (& result)[4])1824 void adjustBatchCount (const DeviceInterface& vkd,
1825 const VkDevice device,
1826 const std::vector<BottomLevelAccelerationStructurePool::BlasPtr>& structs,
1827 const std::vector<BottomLevelAccelerationStructurePool::BlasInfo>& infos,
1828 const VkDeviceSize maxBufferSize,
1829 deUint32 (&result)[4])
1830 {
1831 tcu::Vector<VkDeviceSize, 4> sizes(0);
1832 tcu::Vector<VkDeviceSize, 4> sums(0);
1833 tcu::Vector<deUint32, 4> tmps(0);
1834 tcu::Vector<deUint32, 4> batches(0);
1835
1836 VkDeviceSize updateScratchSize = 0; static_cast<void>(updateScratchSize); // not used yet, disabled for future implementation
1837
1838 auto updateIf = [&](deUint32 c)
1839 {
1840 if (sums[c] + sizes[c] <= maxBufferSize)
1841 {
1842 sums[c] += sizes[c];
1843 tmps[c] += 1;
1844
1845 batches[c] = std::max(tmps[c], batches[c]);
1846 }
1847 else
1848 {
1849 sums[c] = 0;
1850 tmps[c] = 0;
1851 }
1852 };
1853
1854 const deUint32 maxIter = static_cast<deUint32>(structs.size());
1855 for (deUint32 i = 0; i < maxIter; ++i)
1856 {
1857 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(structs[i].get());
1858 std::tie(sizes[0], updateScratchSize, sizes[1], sizes[2], sizes[3]) = str.computeBuildSize(vkd, device, infos[i].structureSize);
1859
1860 updateIf(0);
1861 updateIf(1);
1862 updateIf(2);
1863 updateIf(3);
1864 }
1865
1866 result[0] = std::max(batches[0], 1u);
1867 result[1] = std::max(batches[1], 1u);
1868 result[2] = std::max(batches[2], 1u);
1869 result[3] = std::max(batches[3], 1u);
1870 }
1871
getAllocationCount() const1872 size_t BottomLevelAccelerationStructurePool::getAllocationCount () const
1873 {
1874 return m_impl->m_accellerationStructureBuffers.size()
1875 + m_impl->m_vertexBuffers.size()
1876 + m_impl->m_indexBuffers.size()
1877 + 1 /* for scratch buffer */;
1878 }
1879
getAllocationCount(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize maxBufferSize) const1880 size_t BottomLevelAccelerationStructurePool::getAllocationCount (const DeviceInterface& vk,
1881 const VkDevice device,
1882 const VkDeviceSize maxBufferSize) const
1883 {
1884 DE_ASSERT(m_structs.size() != 0);
1885
1886 std::map<deUint32, VkDeviceSize> accStrSizes;
1887 std::map<deUint32, VkDeviceSize> vertBuffSizes;
1888 std::map<deUint32, VkDeviceSize> indexBuffSizes;
1889 std::map<deUint32, VkDeviceSize> scratchBuffSizes;
1890
1891 const deUint32 allStructsCount = structCount();
1892
1893 deUint32 batchStructCount = m_batchStructCount;
1894 deUint32 batchScratchCount = m_batchStructCount;
1895 deUint32 batchVertexCount = m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1896 deUint32 batchIndexCount = batchVertexCount;
1897
1898 if (!isnegz(maxBufferSize))
1899 {
1900 deUint32 batches[4];
1901 adjustBatchCount(vk, device, m_structs, m_infos, maxBufferSize, batches);
1902 batchStructCount = batches[0];
1903 batchScratchCount = batches[1];
1904 batchVertexCount = batches[2];
1905 batchIndexCount = batches[3];
1906 }
1907
1908 deUint32 iStr = 0;
1909 deUint32 iScratch = 0;
1910 deUint32 iVertex = 0;
1911 deUint32 iIndex = 0;
1912
1913 VkDeviceSize strSize = 0;
1914 VkDeviceSize updateScratchSize = 0;
1915 VkDeviceSize buildScratchSize = 0;
1916 VkDeviceSize vertexSize = 0;
1917 VkDeviceSize indexSize = 0;
1918
1919 for (; iStr < allStructsCount; ++iStr)
1920 {
1921 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1922 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[iStr].structureSize);
1923
1924 {
1925 const VkDeviceSize alignedStrSize = deAlign64(strSize, 256);
1926 const deUint32 accStrIndex = (iStr / batchStructCount);
1927 accStrSizes[accStrIndex] += alignedStrSize;
1928 }
1929
1930 if (buildScratchSize != 0)
1931 {
1932 const VkDeviceSize alignedBuilsScratchSize = deAlign64(buildScratchSize, 256);
1933 const deUint32 scratchBuffIndex = (iScratch/ batchScratchCount);
1934 scratchBuffSizes[scratchBuffIndex] += alignedBuilsScratchSize;
1935 iScratch += 1;
1936 }
1937
1938 if (vertexSize != 0)
1939 {
1940 const VkDeviceSize alignedVertBuffSize = deAlign64(vertexSize, 8);
1941 const deUint32 vertBuffIndex = (iVertex / batchVertexCount);
1942 vertBuffSizes[vertBuffIndex] += alignedVertBuffSize;
1943 iVertex += 1;
1944 }
1945
1946 if (indexSize != 0)
1947 {
1948 const VkDeviceSize alignedIndexBuffSize = deAlign64(indexSize, 8);
1949 const deUint32 indexBuffIndex = (iIndex / batchIndexCount);
1950 indexBuffSizes[indexBuffIndex] += alignedIndexBuffSize;
1951 iIndex += 1;
1952 }
1953 }
1954
1955 return accStrSizes.size()
1956 + vertBuffSizes.size()
1957 + indexBuffSizes.size()
1958 + scratchBuffSizes.size();
1959 }
1960
getAllocationSizes(const DeviceInterface & vk,const VkDevice device) const1961 tcu::Vector<VkDeviceSize, 4> BottomLevelAccelerationStructurePool::getAllocationSizes (const DeviceInterface& vk,
1962 const VkDevice device) const
1963 {
1964 if (m_structsBuffSize)
1965 {
1966 return tcu::Vector<VkDeviceSize, 4>(m_structsBuffSize, m_buildsScratchSize, m_verticesSize, m_indicesSize);
1967 }
1968
1969 VkDeviceSize strSize = 0;
1970 VkDeviceSize updateScratchSize = 0; static_cast<void>(updateScratchSize); // not used yet, disabled for future implementation
1971 VkDeviceSize buildScratchSize = 0;
1972 VkDeviceSize vertexSize = 0;
1973 VkDeviceSize indexSize = 0;
1974 VkDeviceSize sumStrSize = 0;
1975 VkDeviceSize sumUpdateScratchSize = 0; static_cast<void>(sumUpdateScratchSize); // not used yet, disabled for future implementation
1976 VkDeviceSize sumBuildScratchSize = 0;
1977 VkDeviceSize sumVertexSize = 0;
1978 VkDeviceSize sumIndexSize = 0;
1979 for (size_t i = 0; i < structCount(); ++i)
1980 {
1981 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[i].get());
1982 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[i].structureSize);
1983 sumStrSize += deAlign64(strSize, 256);
1984 //sumUpdateScratchSize += deAlign64(updateScratchSize, 256); not used yet, disabled for future implementation
1985 sumBuildScratchSize += deAlign64(buildScratchSize, 256);
1986 sumVertexSize += deAlign64(vertexSize, 8);
1987 sumIndexSize += deAlign64(indexSize, 8);
1988 }
1989 return tcu::Vector<VkDeviceSize, 4>(sumStrSize, sumBuildScratchSize, sumVertexSize, sumIndexSize);
1990 }
1991
batchCreate(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator)1992 void BottomLevelAccelerationStructurePool::batchCreate (const DeviceInterface& vkd,
1993 const VkDevice device,
1994 Allocator& allocator)
1995 {
1996 batchCreateAdjust(vkd, device, allocator, negz<VkDeviceSize>(0));
1997 }
1998
batchCreateAdjust(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator,const VkDeviceSize maxBufferSize)1999 void BottomLevelAccelerationStructurePool::batchCreateAdjust (const DeviceInterface& vkd,
2000 const VkDevice device,
2001 Allocator& allocator,
2002 const VkDeviceSize maxBufferSize)
2003 {
2004 // Prevent a programmer from calling this method more than once.
2005 if (m_createOnce) DE_ASSERT(0);
2006
2007 m_createOnce = true;
2008 DE_ASSERT(m_structs.size() != 0);
2009
2010 auto createAccellerationStructureBuffer = [&](VkDeviceSize bufferSize) -> typename std::add_pointer<BufferWithMemory>::type
2011 {
2012 BufferWithMemory* res = nullptr;
2013 const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2014
2015 if (m_tryCachedMemory) try
2016 {
2017 res = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2018 }
2019 catch (const tcu::NotSupportedError&)
2020 {
2021 res = nullptr;
2022 }
2023
2024 return (nullptr != res)
2025 ? res
2026 : (new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2027 };
2028
2029 auto createDeviceScratchBuffer = [&](VkDeviceSize bufferSize) -> de::SharedPtr<BufferWithMemory>
2030 {
2031 const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2032 BufferWithMemory* p = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2033 return de::SharedPtr<BufferWithMemory>(p);
2034 };
2035
2036 std::map<deUint32, VkDeviceSize> accStrSizes;
2037 std::map<deUint32, VkDeviceSize> vertBuffSizes;
2038 std::map<deUint32, VkDeviceSize> indexBuffSizes;
2039
2040 const deUint32 allStructsCount = structCount();
2041 deUint32 iterKey = 0;
2042
2043 deUint32 batchStructCount = m_batchStructCount;
2044 deUint32 batchVertexCount = m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
2045 deUint32 batchIndexCount = batchVertexCount;
2046
2047 if (!isnegz(maxBufferSize))
2048 {
2049 deUint32 batches[4];
2050 adjustBatchCount(vkd, device, m_structs, m_infos, maxBufferSize, batches);
2051 batchStructCount = batches[0];
2052 // batches[1]: batchScratchCount
2053 batchVertexCount = batches[2];
2054 batchIndexCount = batches[3];
2055 }
2056
2057 deUint32 iStr = 0;
2058 deUint32 iVertex = 0;
2059 deUint32 iIndex = 0;
2060
2061 VkDeviceSize strSize = 0;
2062 VkDeviceSize updateScratchSize = 0;
2063 VkDeviceSize buildScratchSize = 0;
2064 VkDeviceSize maxBuildScratchSize = 0;
2065 VkDeviceSize vertexSize = 0;
2066 VkDeviceSize indexSize = 0;
2067
2068 VkDeviceSize strOffset = 0;
2069 VkDeviceSize vertexOffset = 0;
2070 VkDeviceSize indexOffset = 0;
2071
2072 deUint32 hostStructCount = 0;
2073 deUint32 deviceStructCount = 0;
2074
2075 for (; iStr < allStructsCount; ++iStr)
2076 {
2077 BottomLevelAccelerationStructurePoolMember::Info info{};
2078 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
2079 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vkd, device, m_infos[iStr].structureSize);
2080
2081 ++(str.getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR ? hostStructCount : deviceStructCount);
2082
2083 {
2084 const VkDeviceSize alignedStrSize = deAlign64(strSize, 256);
2085 const deUint32 accStrIndex = (iStr / batchStructCount);
2086 if (iStr != 0 && (iStr % batchStructCount) == 0)
2087 {
2088 strOffset = 0;
2089 }
2090
2091 info.accStrIndex = accStrIndex;
2092 info.accStrOffset = strOffset;
2093 accStrSizes[accStrIndex] += alignedStrSize;
2094 strOffset += alignedStrSize;
2095 m_structsBuffSize += alignedStrSize;
2096 }
2097
2098 if (buildScratchSize != 0)
2099 {
2100 maxBuildScratchSize = std::max(maxBuildScratchSize, make_unsigned(deAlign64(buildScratchSize, 256u)));
2101
2102 info.buildScratchBuffIndex = 0;
2103 info.buildScratchBuffOffset = 0;
2104 }
2105
2106 if (vertexSize != 0)
2107 {
2108 const VkDeviceSize alignedVertBuffSize = deAlign64(vertexSize, 8);
2109 const deUint32 vertBuffIndex = (iVertex / batchVertexCount);
2110 if (iVertex != 0 && (iVertex % batchVertexCount) == 0)
2111 {
2112 vertexOffset = 0;
2113 }
2114
2115 info.vertBuffIndex = vertBuffIndex;
2116 info.vertBuffOffset = vertexOffset;
2117 vertBuffSizes[vertBuffIndex] += alignedVertBuffSize;
2118 vertexOffset += alignedVertBuffSize;
2119 m_verticesSize += alignedVertBuffSize;
2120 iVertex += 1;
2121 }
2122
2123 if (indexSize != 0)
2124 {
2125 const VkDeviceSize alignedIndexBuffSize = deAlign64(indexSize, 8);
2126 const deUint32 indexBuffIndex = (iIndex / batchIndexCount);
2127 if (iIndex != 0 && (iIndex % batchIndexCount) == 0)
2128 {
2129 indexOffset = 0;
2130 }
2131
2132 info.indexBuffIndex = indexBuffIndex;
2133 info.indexBuffOffset = indexOffset;
2134 indexBuffSizes[indexBuffIndex] += alignedIndexBuffSize;
2135 indexOffset += alignedIndexBuffSize;
2136 m_indicesSize += alignedIndexBuffSize;
2137 iIndex += 1;
2138 }
2139
2140 str.preCreateSetSizesAndOffsets(info, strSize, updateScratchSize, buildScratchSize);
2141 }
2142
2143 for (iterKey = 0; iterKey < static_cast<deUint32>(accStrSizes.size()); ++iterKey)
2144 {
2145 m_impl->m_accellerationStructureBuffers.emplace_back(createAccellerationStructureBuffer(accStrSizes.at(iterKey)));
2146 }
2147 for (iterKey = 0; iterKey < static_cast<deUint32>(vertBuffSizes.size()); ++iterKey)
2148 {
2149 m_impl->m_vertexBuffers.emplace_back(createVertexBuffer(vkd, device, allocator, vertBuffSizes.at(iterKey)));
2150 }
2151 for (iterKey = 0; iterKey < static_cast<deUint32>(indexBuffSizes.size()); ++iterKey)
2152 {
2153 m_impl->m_indexBuffers.emplace_back(createIndexBuffer(vkd, device, allocator, indexBuffSizes.at(iterKey)));
2154 }
2155
2156 if (maxBuildScratchSize)
2157 {
2158 if (hostStructCount) m_impl->m_hostScratchBuffer->resize(static_cast<size_t>(maxBuildScratchSize));
2159 if (deviceStructCount) m_impl->m_deviceScratchBuffer = createDeviceScratchBuffer(maxBuildScratchSize);
2160
2161 m_buildsScratchSize = maxBuildScratchSize;
2162 }
2163
2164 for (iterKey = 0; iterKey < allStructsCount; ++iterKey)
2165 {
2166 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iterKey].get());
2167 str.createAccellerationStructure(vkd, device, m_infos[iterKey].deviceAddress);
2168 }
2169 }
2170
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandBuffer cmdBuffer)2171 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface& vk,
2172 const VkDevice device,
2173 VkCommandBuffer cmdBuffer)
2174 {
2175 for (const auto& str : m_structs)
2176 {
2177 str->build(vk, device, cmdBuffer);
2178 }
2179 }
2180
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandPool cmdPool,VkQueue queue,qpWatchDog * watchDog)2181 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface& vk,
2182 const VkDevice device,
2183 VkCommandPool cmdPool,
2184 VkQueue queue,
2185 qpWatchDog* watchDog)
2186 {
2187 const deUint32 limit = 10000u;
2188 const deUint32 count = structCount();
2189 std::vector<BlasPtr> buildingOnDevice;
2190
2191 auto buildOnDevice = [&]() -> void
2192 {
2193 Move<VkCommandBuffer> cmd = allocateCommandBuffer(vk, device, cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2194
2195 beginCommandBuffer(vk, *cmd, 0u);
2196 for (const auto& str : buildingOnDevice)
2197 str->build(vk, device, *cmd);
2198 endCommandBuffer(vk, *cmd);
2199
2200 submitCommandsAndWait(vk, device, queue, *cmd);
2201 vk.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2202 };
2203
2204 buildingOnDevice.reserve(limit);
2205 for (deUint32 i = 0; i < count; ++i)
2206 {
2207 auto str = m_structs[i];
2208
2209 if (str->getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
2210 str->build(vk, device, DE_NULL);
2211 else
2212 buildingOnDevice.emplace_back(str);
2213
2214 if ( buildingOnDevice.size() == limit || (count - 1) == i)
2215 {
2216 buildOnDevice();
2217 buildingOnDevice.clear();
2218 }
2219
2220 if ((i % WATCHDOG_INTERVAL) == 0 && watchDog)
2221 qpWatchDog_touch(watchDog);
2222 }
2223 }
2224
computeBuildSize(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize strSize) const2225 auto BottomLevelAccelerationStructurePoolMember::computeBuildSize (const DeviceInterface& vk,
2226 const VkDevice device,
2227 const VkDeviceSize strSize) const
2228 // accStrSize,updateScratch,buildScratch, vertexSize, indexSize
2229 -> std::tuple<VkDeviceSize, VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize>
2230 {
2231 DE_ASSERT(!m_geometriesData.empty() != !(strSize == 0)); // logical xor
2232
2233 std::tuple<VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize> result(deAlign64(strSize, 256), 0, 0, 0, 0);
2234
2235 if (!m_geometriesData.empty())
2236 {
2237 std::vector<VkAccelerationStructureGeometryKHR> accelerationStructureGeometriesKHR;
2238 std::vector<VkAccelerationStructureGeometryKHR*> accelerationStructureGeometriesKHRPointers;
2239 std::vector<VkAccelerationStructureBuildRangeInfoKHR> accelerationStructureBuildRangeInfoKHR;
2240 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
2241 std::vector<deUint32> maxPrimitiveCounts;
2242 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
2243
2244 const VkAccelerationStructureGeometryKHR* accelerationStructureGeometriesKHRPointer = accelerationStructureGeometriesKHR.data();
2245 const VkAccelerationStructureGeometryKHR* const* accelerationStructureGeometry = accelerationStructureGeometriesKHRPointers.data();
2246
2247 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2248 {
2249 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2250 DE_NULL, // const void* pNext;
2251 VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2252 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2253 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2254 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2255 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
2256 static_cast<deUint32>(accelerationStructureGeometriesKHR.size()), // deUint32 geometryCount;
2257 m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer, // const VkAccelerationStructureGeometryKHR* pGeometries;
2258 m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL, // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2259 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
2260 };
2261
2262 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2263 {
2264 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2265 DE_NULL, // const void* pNext;
2266 0, // VkDeviceSize accelerationStructureSize;
2267 0, // VkDeviceSize updateScratchSize;
2268 0 // VkDeviceSize buildScratchSize;
2269 };
2270
2271 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2272
2273 std::get<0>(result) = sizeInfo.accelerationStructureSize;
2274 std::get<1>(result) = sizeInfo.updateScratchSize;
2275 std::get<2>(result) = sizeInfo.buildScratchSize;
2276 std::get<3>(result) = getVertexBufferSize(m_geometriesData);
2277 std::get<4>(result) = getIndexBufferSize(m_geometriesData);
2278 }
2279
2280 return result;
2281 }
2282
preCreateSetSizesAndOffsets(const Info & info,const VkDeviceSize accStrSize,const VkDeviceSize updateScratchSize,const VkDeviceSize buildScratchSize)2283 void BottomLevelAccelerationStructurePoolMember::preCreateSetSizesAndOffsets (const Info& info,
2284 const VkDeviceSize accStrSize,
2285 const VkDeviceSize updateScratchSize,
2286 const VkDeviceSize buildScratchSize)
2287 {
2288 m_info = info;
2289 m_structureSize = accStrSize;
2290 m_updateScratchSize = updateScratchSize;
2291 m_buildScratchSize = buildScratchSize;
2292 }
2293
createAccellerationStructure(const DeviceInterface & vk,const VkDevice device,VkDeviceAddress deviceAddress)2294 void BottomLevelAccelerationStructurePoolMember::createAccellerationStructure (const DeviceInterface& vk,
2295 const VkDevice device,
2296 VkDeviceAddress deviceAddress)
2297 {
2298 const VkAccelerationStructureTypeKHR structureType = (m_createGeneric
2299 ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2300 : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
2301 const VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfoKHR
2302 {
2303 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // VkStructureType sType;
2304 DE_NULL, // const void* pNext;
2305 m_createFlags, // VkAccelerationStructureCreateFlagsKHR createFlags;
2306 getAccelerationStructureBuffer()->get(), // VkBuffer buffer;
2307 getAccelerationStructureBufferOffset(), // VkDeviceSize offset;
2308 m_structureSize, // VkDeviceSize size;
2309 structureType, // VkAccelerationStructureTypeKHR type;
2310 deviceAddress // VkDeviceAddress deviceAddress;
2311 };
2312
2313 m_accelerationStructureKHR = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2314 }
2315
~TopLevelAccelerationStructure()2316 TopLevelAccelerationStructure::~TopLevelAccelerationStructure ()
2317 {
2318 }
2319
TopLevelAccelerationStructure()2320 TopLevelAccelerationStructure::TopLevelAccelerationStructure ()
2321 : m_structureSize (0u)
2322 , m_updateScratchSize (0u)
2323 , m_buildScratchSize (0u)
2324 {
2325 }
2326
setInstanceCount(const size_t instanceCount)2327 void TopLevelAccelerationStructure::setInstanceCount (const size_t instanceCount)
2328 {
2329 m_bottomLevelInstances.reserve(instanceCount);
2330 m_instanceData.reserve(instanceCount);
2331 }
2332
addInstance(de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,const VkTransformMatrixKHR & matrix,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags)2333 void TopLevelAccelerationStructure::addInstance (de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,
2334 const VkTransformMatrixKHR& matrix,
2335 deUint32 instanceCustomIndex,
2336 deUint32 mask,
2337 deUint32 instanceShaderBindingTableRecordOffset,
2338 VkGeometryInstanceFlagsKHR flags)
2339 {
2340 m_bottomLevelInstances.push_back(bottomLevelStructure);
2341 m_instanceData.push_back(InstanceData(matrix, instanceCustomIndex, mask, instanceShaderBindingTableRecordOffset, flags));
2342 }
2343
getStructureBuildSizes() const2344 VkAccelerationStructureBuildSizesInfoKHR TopLevelAccelerationStructure::getStructureBuildSizes () const
2345 {
2346 return
2347 {
2348 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2349 DE_NULL, // const void* pNext;
2350 m_structureSize, // VkDeviceSize accelerationStructureSize;
2351 m_updateScratchSize, // VkDeviceSize updateScratchSize;
2352 m_buildScratchSize // VkDeviceSize buildScratchSize;
2353 };
2354 }
2355
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)2356 void TopLevelAccelerationStructure::createAndBuild (const DeviceInterface& vk,
2357 const VkDevice device,
2358 const VkCommandBuffer cmdBuffer,
2359 Allocator& allocator,
2360 VkDeviceAddress deviceAddress)
2361 {
2362 create(vk, device, allocator, 0u, deviceAddress);
2363 build(vk, device, cmdBuffer);
2364 }
2365
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,TopLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)2366 void TopLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface& vk,
2367 const VkDevice device,
2368 const VkCommandBuffer cmdBuffer,
2369 Allocator& allocator,
2370 TopLevelAccelerationStructure* accelerationStructure,
2371 VkDeviceSize compactCopySize,
2372 VkDeviceAddress deviceAddress)
2373 {
2374 DE_ASSERT(accelerationStructure != NULL);
2375 VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
2376 DE_ASSERT(copiedSize != 0u);
2377
2378 create(vk, device, allocator, copiedSize, deviceAddress);
2379 copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
2380 }
2381
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)2382 void TopLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
2383 const VkDevice device,
2384 const VkCommandBuffer cmdBuffer,
2385 Allocator& allocator,
2386 SerialStorage* storage,
2387 VkDeviceAddress deviceAddress)
2388 {
2389 DE_ASSERT(storage != NULL);
2390 DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
2391 create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
2392 if (storage->hasDeepFormat()) createAndDeserializeBottoms(vk, device, cmdBuffer, allocator, storage);
2393 deserialize(vk, device, cmdBuffer, storage);
2394 }
2395
createInstanceBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelInstances,std::vector<InstanceData> instanceData,const bool tryCachedMemory)2396 BufferWithMemory* createInstanceBuffer (const DeviceInterface& vk,
2397 const VkDevice device,
2398 Allocator& allocator,
2399 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > bottomLevelInstances,
2400 std::vector<InstanceData> instanceData,
2401 const bool tryCachedMemory)
2402 {
2403 DE_ASSERT(bottomLevelInstances.size() != 0);
2404 DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2405 DE_UNREF(instanceData);
2406
2407 BufferWithMemory* result = nullptr;
2408 const VkDeviceSize bufferSizeBytes = bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2409 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2410 if (tryCachedMemory) try
2411 {
2412 result = new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2413 }
2414 catch (const tcu::NotSupportedError&)
2415 {
2416 result = nullptr;
2417 }
2418 return result
2419 ? result
2420 : new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2421 }
2422
updateSingleInstance(const DeviceInterface & vk,const VkDevice device,const BottomLevelAccelerationStructure & bottomLevelAccelerationStructure,const InstanceData & instanceData,deUint8 * bufferLocation,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2423 void updateSingleInstance (const DeviceInterface& vk,
2424 const VkDevice device,
2425 const BottomLevelAccelerationStructure& bottomLevelAccelerationStructure,
2426 const InstanceData& instanceData,
2427 deUint8* bufferLocation,
2428 VkAccelerationStructureBuildTypeKHR buildType,
2429 bool inactiveInstances)
2430 {
2431 const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
2432
2433 // This part needs to be fixed once a new version of the VkAccelerationStructureInstanceKHR will be added to vkStructTypes.inl
2434 VkDeviceAddress accelerationStructureAddress;
2435 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2436 {
2437 VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
2438 {
2439 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // VkStructureType sType;
2440 DE_NULL, // const void* pNext;
2441 accelerationStructureKHR // VkAccelerationStructureKHR accelerationStructure;
2442 };
2443 accelerationStructureAddress = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
2444 }
2445
2446 deUint64 structureReference;
2447 if (inactiveInstances)
2448 {
2449 // Instances will be marked inactive by making their references VK_NULL_HANDLE or having address zero.
2450 structureReference = 0ull;
2451 }
2452 else
2453 {
2454 structureReference = (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2455 ? deUint64(accelerationStructureAddress)
2456 : deUint64(accelerationStructureKHR.getInternal());
2457 }
2458
2459 VkAccelerationStructureInstanceKHR accelerationStructureInstanceKHR = makeVkAccelerationStructureInstanceKHR
2460 (
2461 instanceData.matrix, // VkTransformMatrixKHR transform;
2462 instanceData.instanceCustomIndex, // deUint32 instanceCustomIndex:24;
2463 instanceData.mask, // deUint32 mask:8;
2464 instanceData.instanceShaderBindingTableRecordOffset, // deUint32 instanceShaderBindingTableRecordOffset:24;
2465 instanceData.flags, // VkGeometryInstanceFlagsKHR flags:8;
2466 structureReference // deUint64 accelerationStructureReference;
2467 );
2468
2469 deMemcpy(bufferLocation, &accelerationStructureInstanceKHR, sizeof(VkAccelerationStructureInstanceKHR));
2470 }
2471
updateInstanceBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelInstances,const std::vector<InstanceData> & instanceData,const BufferWithMemory * instanceBuffer,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2472 void updateInstanceBuffer (const DeviceInterface& vk,
2473 const VkDevice device,
2474 const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>& bottomLevelInstances,
2475 const std::vector<InstanceData>& instanceData,
2476 const BufferWithMemory* instanceBuffer,
2477 VkAccelerationStructureBuildTypeKHR buildType,
2478 bool inactiveInstances)
2479 {
2480 DE_ASSERT(bottomLevelInstances.size() != 0);
2481 DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2482
2483 auto& instancesAlloc = instanceBuffer->getAllocation();
2484 auto bufferStart = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2485 VkDeviceSize bufferOffset = 0ull;
2486
2487 for (size_t instanceNdx = 0; instanceNdx < bottomLevelInstances.size(); ++instanceNdx)
2488 {
2489 const auto& blas = *bottomLevelInstances[instanceNdx];
2490 updateSingleInstance(vk, device, blas, instanceData[instanceNdx], bufferStart + bufferOffset, buildType, inactiveInstances);
2491 bufferOffset += sizeof(VkAccelerationStructureInstanceKHR);
2492 }
2493
2494 flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2495 }
2496
2497 class TopLevelAccelerationStructureKHR : public TopLevelAccelerationStructure
2498 {
2499 public:
2500 static deUint32 getRequiredAllocationCount (void);
2501
2502 TopLevelAccelerationStructureKHR ();
2503 TopLevelAccelerationStructureKHR (const TopLevelAccelerationStructureKHR& other) = delete;
2504 virtual ~TopLevelAccelerationStructureKHR ();
2505
2506 void setBuildType (const VkAccelerationStructureBuildTypeKHR buildType) override;
2507 void setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags) override;
2508 void setCreateGeneric (bool createGeneric) override;
2509 void setCreationBufferUnbounded (bool creationBufferUnbounded) override;
2510 void setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags) override;
2511 void setBuildWithoutPrimitives (bool buildWithoutPrimitives) override;
2512 void setInactiveInstances (bool inactiveInstances) override;
2513 void setDeferredOperation (const bool deferredOperation,
2514 const deUint32 workerThreadCount) override;
2515 void setUseArrayOfPointers (const bool useArrayOfPointers) override;
2516 void setIndirectBuildParameters (const VkBuffer indirectBuffer,
2517 const VkDeviceSize indirectBufferOffset,
2518 const deUint32 indirectBufferStride) override;
2519 void setUsePPGeometries (const bool usePPGeometries) override;
2520 void setTryCachedMemory (const bool tryCachedMemory) override;
2521 VkBuildAccelerationStructureFlagsKHR getBuildFlags () const override;
2522
2523 void getCreationSizes (const DeviceInterface& vk,
2524 const VkDevice device,
2525 const VkDeviceSize structureSize,
2526 CreationSizes& sizes) override;
2527 void create (const DeviceInterface& vk,
2528 const VkDevice device,
2529 Allocator& allocator,
2530 VkDeviceSize structureSize,
2531 VkDeviceAddress deviceAddress = 0u,
2532 const void* pNext = DE_NULL,
2533 const MemoryRequirement& addMemoryRequirement = MemoryRequirement::Any,
2534 const VkBuffer creationBuffer = VK_NULL_HANDLE,
2535 const VkDeviceSize creationBufferSize = 0u) override;
2536 void build (const DeviceInterface& vk,
2537 const VkDevice device,
2538 const VkCommandBuffer cmdBuffer,
2539 TopLevelAccelerationStructure* srcAccelerationStructure = DE_NULL) override;
2540 void copyFrom (const DeviceInterface& vk,
2541 const VkDevice device,
2542 const VkCommandBuffer cmdBuffer,
2543 TopLevelAccelerationStructure* accelerationStructure,
2544 bool compactCopy) override;
2545 void serialize (const DeviceInterface& vk,
2546 const VkDevice device,
2547 const VkCommandBuffer cmdBuffer,
2548 SerialStorage* storage) override;
2549 void deserialize (const DeviceInterface& vk,
2550 const VkDevice device,
2551 const VkCommandBuffer cmdBuffer,
2552 SerialStorage* storage) override;
2553
2554 std::vector<VkDeviceSize> getSerializingSizes (const DeviceInterface& vk,
2555 const VkDevice device,
2556 const VkQueue queue,
2557 const deUint32 queueFamilyIndex) override;
2558
2559 std::vector<deUint64> getSerializingAddresses (const DeviceInterface& vk,
2560 const VkDevice device) const override;
2561
2562
2563 const VkAccelerationStructureKHR* getPtr (void) const override;
2564
2565 void updateInstanceMatrix (const DeviceInterface& vk,
2566 const VkDevice device,
2567 size_t instanceIndex,
2568 const VkTransformMatrixKHR& matrix) override;
2569
2570 protected:
2571 VkAccelerationStructureBuildTypeKHR m_buildType;
2572 VkAccelerationStructureCreateFlagsKHR m_createFlags;
2573 bool m_createGeneric;
2574 bool m_creationBufferUnbounded;
2575 VkBuildAccelerationStructureFlagsKHR m_buildFlags;
2576 bool m_buildWithoutPrimitives;
2577 bool m_inactiveInstances;
2578 bool m_deferredOperation;
2579 deUint32 m_workerThreadCount;
2580 bool m_useArrayOfPointers;
2581 de::MovePtr<BufferWithMemory> m_accelerationStructureBuffer;
2582 de::MovePtr<BufferWithMemory> m_instanceBuffer;
2583 de::MovePtr<BufferWithMemory> m_instanceAddressBuffer;
2584 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
2585 std::vector<deUint8> m_hostScratchBuffer;
2586 Move<VkAccelerationStructureKHR> m_accelerationStructureKHR;
2587 VkBuffer m_indirectBuffer;
2588 VkDeviceSize m_indirectBufferOffset;
2589 deUint32 m_indirectBufferStride;
2590 bool m_usePPGeometries;
2591 bool m_tryCachedMemory;
2592
2593
2594 void prepareInstances (const DeviceInterface& vk,
2595 const VkDevice device,
2596 VkAccelerationStructureGeometryKHR& accelerationStructureGeometryKHR,
2597 std::vector<deUint32>& maxPrimitiveCounts);
2598
2599 void serializeBottoms (const DeviceInterface& vk,
2600 const VkDevice device,
2601 const VkCommandBuffer cmdBuffer,
2602 SerialStorage* storage,
2603 VkDeferredOperationKHR deferredOperation);
2604
2605 void createAndDeserializeBottoms (const DeviceInterface& vk,
2606 const VkDevice device,
2607 const VkCommandBuffer cmdBuffer,
2608 Allocator& allocator,
2609 SerialStorage* storage) override;
2610 };
2611
getRequiredAllocationCount(void)2612 deUint32 TopLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
2613 {
2614 /*
2615 de::MovePtr<BufferWithMemory> m_instanceBuffer;
2616 de::MovePtr<Allocation> m_accelerationStructureAlloc;
2617 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
2618 */
2619 return 3u;
2620 }
2621
TopLevelAccelerationStructureKHR()2622 TopLevelAccelerationStructureKHR::TopLevelAccelerationStructureKHR ()
2623 : TopLevelAccelerationStructure ()
2624 , m_buildType (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2625 , m_createFlags (0u)
2626 , m_createGeneric (false)
2627 , m_creationBufferUnbounded (false)
2628 , m_buildFlags (0u)
2629 , m_buildWithoutPrimitives (false)
2630 , m_inactiveInstances (false)
2631 , m_deferredOperation (false)
2632 , m_workerThreadCount (0)
2633 , m_useArrayOfPointers (false)
2634 , m_accelerationStructureBuffer (DE_NULL)
2635 , m_instanceBuffer (DE_NULL)
2636 , m_instanceAddressBuffer (DE_NULL)
2637 , m_deviceScratchBuffer (DE_NULL)
2638 , m_accelerationStructureKHR ()
2639 , m_indirectBuffer (DE_NULL)
2640 , m_indirectBufferOffset (0)
2641 , m_indirectBufferStride (0)
2642 , m_usePPGeometries (false)
2643 , m_tryCachedMemory (true)
2644 {
2645 }
2646
~TopLevelAccelerationStructureKHR()2647 TopLevelAccelerationStructureKHR::~TopLevelAccelerationStructureKHR ()
2648 {
2649 }
2650
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)2651 void TopLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR buildType)
2652 {
2653 m_buildType = buildType;
2654 }
2655
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)2656 void TopLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags)
2657 {
2658 m_createFlags = createFlags;
2659 }
2660
setCreateGeneric(bool createGeneric)2661 void TopLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
2662 {
2663 m_createGeneric = createGeneric;
2664 }
2665
setCreationBufferUnbounded(bool creationBufferUnbounded)2666 void TopLevelAccelerationStructureKHR::setCreationBufferUnbounded (bool creationBufferUnbounded)
2667 {
2668 m_creationBufferUnbounded = creationBufferUnbounded;
2669 }
2670
setInactiveInstances(bool inactiveInstances)2671 void TopLevelAccelerationStructureKHR::setInactiveInstances (bool inactiveInstances)
2672 {
2673 m_inactiveInstances = inactiveInstances;
2674 }
2675
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)2676 void TopLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags)
2677 {
2678 m_buildFlags = buildFlags;
2679 }
2680
setBuildWithoutPrimitives(bool buildWithoutPrimitives)2681 void TopLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
2682 {
2683 m_buildWithoutPrimitives = buildWithoutPrimitives;
2684 }
2685
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)2686 void TopLevelAccelerationStructureKHR::setDeferredOperation (const bool deferredOperation,
2687 const deUint32 workerThreadCount)
2688 {
2689 m_deferredOperation = deferredOperation;
2690 m_workerThreadCount = workerThreadCount;
2691 }
2692
setUseArrayOfPointers(const bool useArrayOfPointers)2693 void TopLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool useArrayOfPointers)
2694 {
2695 m_useArrayOfPointers = useArrayOfPointers;
2696 }
2697
setUsePPGeometries(const bool usePPGeometries)2698 void TopLevelAccelerationStructureKHR::setUsePPGeometries (const bool usePPGeometries)
2699 {
2700 m_usePPGeometries = usePPGeometries;
2701 }
2702
setTryCachedMemory(const bool tryCachedMemory)2703 void TopLevelAccelerationStructureKHR::setTryCachedMemory (const bool tryCachedMemory)
2704 {
2705 m_tryCachedMemory = tryCachedMemory;
2706 }
2707
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)2708 void TopLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer indirectBuffer,
2709 const VkDeviceSize indirectBufferOffset,
2710 const deUint32 indirectBufferStride)
2711 {
2712 m_indirectBuffer = indirectBuffer;
2713 m_indirectBufferOffset = indirectBufferOffset;
2714 m_indirectBufferStride = indirectBufferStride;
2715 }
2716
getBuildFlags() const2717 VkBuildAccelerationStructureFlagsKHR TopLevelAccelerationStructureKHR::getBuildFlags () const
2718 {
2719 return m_buildFlags;
2720 }
2721
sum() const2722 VkDeviceSize TopLevelAccelerationStructure::CreationSizes::sum () const
2723 {
2724 return structure + updateScratch + buildScratch + instancePointers + instancesBuffer;
2725 }
2726
getCreationSizes(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize structureSize,CreationSizes & sizes)2727 void TopLevelAccelerationStructureKHR::getCreationSizes (const DeviceInterface& vk,
2728 const VkDevice device,
2729 const VkDeviceSize structureSize,
2730 CreationSizes& sizes)
2731 {
2732 // AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2733 // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2734 DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2735
2736 if (structureSize == 0)
2737 {
2738 VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR;
2739 const auto accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2740 std::vector<deUint32> maxPrimitiveCounts;
2741 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2742
2743 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2744 {
2745 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2746 DE_NULL, // const void* pNext;
2747 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2748 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2749 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2750 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2751 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
2752 1u, // deUint32 geometryCount;
2753 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR), // const VkAccelerationStructureGeometryKHR* pGeometries;
2754 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr), // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2755 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
2756 };
2757
2758 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2759 {
2760 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2761 DE_NULL, // const void* pNext;
2762 0, // VkDeviceSize accelerationStructureSize;
2763 0, // VkDeviceSize updateScratchSize;
2764 0 // VkDeviceSize buildScratchSize;
2765 };
2766
2767 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2768
2769 sizes.structure = sizeInfo.accelerationStructureSize;
2770 sizes.updateScratch = sizeInfo.updateScratchSize;
2771 sizes.buildScratch = sizeInfo.buildScratchSize;
2772 }
2773 else
2774 {
2775 sizes.structure = structureSize;
2776 sizes.updateScratch = 0u;
2777 sizes.buildScratch = 0u;
2778 }
2779
2780 sizes.instancePointers = 0u;
2781 if (m_useArrayOfPointers)
2782 {
2783 const size_t pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2784 sizes.instancePointers = static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize);
2785 }
2786
2787 sizes.instancesBuffer = m_bottomLevelInstances.empty() ? 0u : m_bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2788 }
2789
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement,const VkBuffer creationBuffer,const VkDeviceSize creationBufferSize)2790 void TopLevelAccelerationStructureKHR::create (const DeviceInterface& vk,
2791 const VkDevice device,
2792 Allocator& allocator,
2793 VkDeviceSize structureSize,
2794 VkDeviceAddress deviceAddress,
2795 const void* pNext,
2796 const MemoryRequirement& addMemoryRequirement,
2797 const VkBuffer creationBuffer,
2798 const VkDeviceSize creationBufferSize)
2799 {
2800 // AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2801 // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2802 DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2803
2804 if (structureSize == 0)
2805 {
2806 VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR;
2807 const auto accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2808 std::vector<deUint32> maxPrimitiveCounts;
2809 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2810
2811 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2812 {
2813 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2814 DE_NULL, // const void* pNext;
2815 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2816 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2817 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2818 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2819 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
2820 1u, // deUint32 geometryCount;
2821 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR), // const VkAccelerationStructureGeometryKHR* pGeometries;
2822 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr), // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2823 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
2824 };
2825
2826 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2827 {
2828 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2829 DE_NULL, // const void* pNext;
2830 0, // VkDeviceSize accelerationStructureSize;
2831 0, // VkDeviceSize updateScratchSize;
2832 0 // VkDeviceSize buildScratchSize;
2833 };
2834
2835 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2836
2837 m_structureSize = sizeInfo.accelerationStructureSize;
2838 m_updateScratchSize = sizeInfo.updateScratchSize;
2839 m_buildScratchSize = sizeInfo.buildScratchSize;
2840 }
2841 else
2842 {
2843 m_structureSize = structureSize;
2844 m_updateScratchSize = 0u;
2845 m_buildScratchSize = 0u;
2846 }
2847
2848 const bool externalCreationBuffer = (creationBuffer != VK_NULL_HANDLE);
2849
2850 if (externalCreationBuffer)
2851 {
2852 DE_UNREF(creationBufferSize); // For release builds.
2853 DE_ASSERT(creationBufferSize >= m_structureSize);
2854 }
2855
2856 if (!externalCreationBuffer)
2857 {
2858 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2859 const MemoryRequirement memoryRequirement = addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
2860 const bool bindMemOnCreation = (!m_creationBufferUnbounded);
2861
2862 try
2863 {
2864 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, (MemoryRequirement::Cached | memoryRequirement), bindMemOnCreation));
2865 }
2866 catch (const tcu::NotSupportedError&)
2867 {
2868 // retry without Cached flag
2869 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement, bindMemOnCreation));
2870 }
2871 }
2872
2873 const auto createInfoBuffer = (externalCreationBuffer ? creationBuffer : m_accelerationStructureBuffer->get());
2874 {
2875 const VkAccelerationStructureTypeKHR structureType = (m_createGeneric
2876 ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2877 : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
2878 const VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfoKHR =
2879 {
2880 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // VkStructureType sType;
2881 pNext, // const void* pNext;
2882 m_createFlags, // VkAccelerationStructureCreateFlagsKHR createFlags;
2883 createInfoBuffer, // VkBuffer buffer;
2884 0u, // VkDeviceSize offset;
2885 m_structureSize, // VkDeviceSize size;
2886 structureType, // VkAccelerationStructureTypeKHR type;
2887 deviceAddress // VkDeviceAddress deviceAddress;
2888 };
2889
2890 m_accelerationStructureKHR = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2891
2892 // Make sure buffer memory is always bound after creation.
2893 if (!externalCreationBuffer)
2894 m_accelerationStructureBuffer->bindMemory();
2895 }
2896
2897 if (m_buildScratchSize > 0u)
2898 {
2899 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2900 {
2901 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2902 m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2903 }
2904 else
2905 {
2906 m_hostScratchBuffer.resize(static_cast<size_t>(m_buildScratchSize));
2907 }
2908 }
2909
2910 if (m_useArrayOfPointers)
2911 {
2912 const size_t pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2913 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2914 m_instanceAddressBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2915 }
2916
2917 if(!m_bottomLevelInstances.empty())
2918 m_instanceBuffer = de::MovePtr<BufferWithMemory>(createInstanceBuffer(vk, device, allocator, m_bottomLevelInstances, m_instanceData, m_tryCachedMemory));
2919 }
2920
updateInstanceMatrix(const DeviceInterface & vk,const VkDevice device,size_t instanceIndex,const VkTransformMatrixKHR & matrix)2921 void TopLevelAccelerationStructureKHR::updateInstanceMatrix (const DeviceInterface& vk, const VkDevice device, size_t instanceIndex, const VkTransformMatrixKHR& matrix)
2922 {
2923 DE_ASSERT(instanceIndex < m_bottomLevelInstances.size());
2924 DE_ASSERT(instanceIndex < m_instanceData.size());
2925
2926 const auto& blas = *m_bottomLevelInstances[instanceIndex];
2927 auto& instanceData = m_instanceData[instanceIndex];
2928 auto& instancesAlloc = m_instanceBuffer->getAllocation();
2929 auto bufferStart = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2930 VkDeviceSize bufferOffset = sizeof(VkAccelerationStructureInstanceKHR) * instanceIndex;
2931
2932 instanceData.matrix = matrix;
2933 updateSingleInstance(vk, device, blas, instanceData, bufferStart + bufferOffset, m_buildType, m_inactiveInstances);
2934 flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2935 }
2936
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,TopLevelAccelerationStructure * srcAccelerationStructure)2937 void TopLevelAccelerationStructureKHR::build (const DeviceInterface& vk,
2938 const VkDevice device,
2939 const VkCommandBuffer cmdBuffer,
2940 TopLevelAccelerationStructure* srcAccelerationStructure)
2941 {
2942 DE_ASSERT(!m_bottomLevelInstances.empty());
2943 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2944 DE_ASSERT(m_buildScratchSize != 0);
2945
2946 updateInstanceBuffer(vk, device, m_bottomLevelInstances, m_instanceData, m_instanceBuffer.get(), m_buildType, m_inactiveInstances);
2947
2948 VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR;
2949 const auto accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2950 std::vector<deUint32> maxPrimitiveCounts;
2951 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2952
2953 VkDeviceOrHostAddressKHR scratchData = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2954 ? makeDeviceOrHostAddressKHR(vk, device, m_deviceScratchBuffer->get(), 0)
2955 : makeDeviceOrHostAddressKHR(m_hostScratchBuffer.data());
2956
2957 VkAccelerationStructureKHR srcStructure = (srcAccelerationStructure != DE_NULL) ? *(srcAccelerationStructure->getPtr()) : DE_NULL;
2958 VkBuildAccelerationStructureModeKHR mode = (srcAccelerationStructure != DE_NULL) ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
2959
2960 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2961 {
2962 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2963 DE_NULL, // const void* pNext;
2964 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2965 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2966 mode, // VkBuildAccelerationStructureModeKHR mode;
2967 srcStructure, // VkAccelerationStructureKHR srcAccelerationStructure;
2968 m_accelerationStructureKHR.get(), // VkAccelerationStructureKHR dstAccelerationStructure;
2969 1u, // deUint32 geometryCount;
2970 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR), // const VkAccelerationStructureGeometryKHR* pGeometries;
2971 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr), // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2972 scratchData // VkDeviceOrHostAddressKHR scratchData;
2973 };
2974
2975 const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : static_cast<deUint32>(m_bottomLevelInstances.size()));
2976
2977 VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfoKHR =
2978 {
2979 primitiveCount, // deUint32 primitiveCount;
2980 0, // deUint32 primitiveOffset;
2981 0, // deUint32 firstVertex;
2982 0 // deUint32 transformOffset;
2983 };
2984 VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr = &accelerationStructureBuildRangeInfoKHR;
2985
2986 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2987 {
2988 if (m_indirectBuffer == DE_NULL)
2989 vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2990 else
2991 {
2992 VkDeviceAddress indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
2993 deUint32* pMaxPrimitiveCounts = maxPrimitiveCounts.data();
2994 vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
2995 }
2996 }
2997 else if (!m_deferredOperation)
2998 {
2999 VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
3000 }
3001 else
3002 {
3003 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
3004 const auto deferredOperation = deferredOperationPtr.get();
3005
3006 VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
3007
3008 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3009
3010 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3011
3012 accelerationStructureBuildGeometryInfoKHR.pNext = DE_NULL;
3013 }
3014
3015 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3016 {
3017 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3018 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
3019
3020 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3021 }
3022 }
3023
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,TopLevelAccelerationStructure * accelerationStructure,bool compactCopy)3024 void TopLevelAccelerationStructureKHR::copyFrom (const DeviceInterface& vk,
3025 const VkDevice device,
3026 const VkCommandBuffer cmdBuffer,
3027 TopLevelAccelerationStructure* accelerationStructure,
3028 bool compactCopy)
3029 {
3030 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
3031 DE_ASSERT(accelerationStructure != DE_NULL);
3032
3033 VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
3034 {
3035 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
3036 DE_NULL, // const void* pNext;
3037 *(accelerationStructure->getPtr()), // VkAccelerationStructureKHR src;
3038 *(getPtr()), // VkAccelerationStructureKHR dst;
3039 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR // VkCopyAccelerationStructureModeKHR mode;
3040 };
3041
3042 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3043 {
3044 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
3045 }
3046 else if (!m_deferredOperation)
3047 {
3048 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
3049 }
3050 else
3051 {
3052 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
3053 const auto deferredOperation = deferredOperationPtr.get();
3054
3055 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
3056
3057 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3058
3059 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3060 }
3061
3062 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3063 {
3064 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3065 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
3066
3067 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3068 }
3069
3070 }
3071
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)3072 void TopLevelAccelerationStructureKHR::serialize (const DeviceInterface& vk,
3073 const VkDevice device,
3074 const VkCommandBuffer cmdBuffer,
3075 SerialStorage* storage)
3076 {
3077 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
3078 DE_ASSERT(storage != DE_NULL);
3079
3080 const VkCopyAccelerationStructureToMemoryInfoKHR copyAccelerationStructureInfo =
3081 {
3082 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, // VkStructureType sType;
3083 DE_NULL, // const void* pNext;
3084 *(getPtr()), // VkAccelerationStructureKHR src;
3085 storage->getAddress(vk, device, m_buildType), // VkDeviceOrHostAddressKHR dst;
3086 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
3087 };
3088
3089 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3090 {
3091 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, ©AccelerationStructureInfo);
3092 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
3093 }
3094 else if (!m_deferredOperation)
3095 {
3096 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, ©AccelerationStructureInfo));
3097 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
3098 }
3099 else
3100 {
3101 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
3102 const auto deferredOperation = deferredOperationPtr.get();
3103
3104 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, ©AccelerationStructureInfo);
3105
3106 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3107 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, deferredOperation);
3108
3109 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3110 }
3111 }
3112
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)3113 void TopLevelAccelerationStructureKHR::deserialize (const DeviceInterface& vk,
3114 const VkDevice device,
3115 const VkCommandBuffer cmdBuffer,
3116 SerialStorage* storage)
3117 {
3118 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
3119 DE_ASSERT(storage != DE_NULL);
3120
3121 const VkCopyMemoryToAccelerationStructureInfoKHR copyAccelerationStructureInfo =
3122 {
3123 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
3124 DE_NULL, // const void* pNext;
3125 storage->getAddressConst(vk, device, m_buildType), // VkDeviceOrHostAddressConstKHR src;
3126 *(getPtr()), // VkAccelerationStructureKHR dst;
3127 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
3128 };
3129
3130 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3131 {
3132 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
3133 }
3134 else if (!m_deferredOperation)
3135 {
3136 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
3137 }
3138 else
3139 {
3140 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
3141 const auto deferredOperation = deferredOperationPtr.get();
3142
3143 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
3144
3145 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3146
3147 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3148 }
3149
3150 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3151 {
3152 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3153 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
3154
3155 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3156 }
3157 }
3158
serializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage,VkDeferredOperationKHR deferredOperation)3159 void TopLevelAccelerationStructureKHR::serializeBottoms (const DeviceInterface& vk,
3160 const VkDevice device,
3161 const VkCommandBuffer cmdBuffer,
3162 SerialStorage* storage,
3163 VkDeferredOperationKHR deferredOperation)
3164 {
3165 DE_UNREF(deferredOperation);
3166 DE_ASSERT(storage->hasDeepFormat());
3167
3168 const std::vector<deUint64>& addresses = storage->getSerialInfo().addresses();
3169 const std::size_t cbottoms = m_bottomLevelInstances.size();
3170
3171 deUint32 storageIndex = 0;
3172 std::vector<deUint64> matches;
3173
3174 for (std::size_t i = 0; i < cbottoms; ++i)
3175 {
3176 const deUint64& lookAddr = addresses[i+1];
3177 auto end = matches.end();
3178 auto match = std::find_if(matches.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
3179 if (match == end)
3180 {
3181 matches.emplace_back(lookAddr);
3182 m_bottomLevelInstances[i].get()->serialize(vk, device, cmdBuffer, storage->getBottomStorage(storageIndex).get());
3183 storageIndex += 1;
3184 }
3185 }
3186 }
3187
createAndDeserializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage)3188 void TopLevelAccelerationStructureKHR::createAndDeserializeBottoms (const DeviceInterface& vk,
3189 const VkDevice device,
3190 const VkCommandBuffer cmdBuffer,
3191 Allocator& allocator,
3192 SerialStorage* storage)
3193 {
3194 DE_ASSERT(storage->hasDeepFormat());
3195 DE_ASSERT(m_bottomLevelInstances.size() == 0);
3196
3197 const std::vector<deUint64>& addresses = storage->getSerialInfo().addresses();
3198 const std::size_t cbottoms = addresses.size() - 1;
3199 deUint32 storageIndex = 0;
3200 std::vector<std::pair<deUint64, std::size_t>> matches;
3201
3202 for (std::size_t i = 0; i < cbottoms; ++i)
3203 {
3204 const deUint64& lookAddr = addresses[i+1];
3205 auto end = matches.end();
3206 auto match = std::find_if(matches.begin(), end, [&](const std::pair<deUint64, std::size_t>& item){ return item.first == lookAddr; });
3207 if (match != end)
3208 {
3209 m_bottomLevelInstances .emplace_back(m_bottomLevelInstances[match->second]);
3210 }
3211 else
3212 {
3213 de::MovePtr<BottomLevelAccelerationStructure> blas = makeBottomLevelAccelerationStructure();
3214 blas->createAndDeserializeFrom(vk, device, cmdBuffer, allocator, storage->getBottomStorage(storageIndex).get());
3215 m_bottomLevelInstances.emplace_back(de::SharedPtr<BottomLevelAccelerationStructure>(blas.release()));
3216 matches.emplace_back(lookAddr, i);
3217 storageIndex += 1;
3218 }
3219 }
3220
3221 std::vector<deUint64> newAddresses = getSerializingAddresses(vk, device);
3222 DE_ASSERT(addresses.size() == newAddresses.size());
3223
3224 SerialStorage::AccelerationStructureHeader* header = storage->getASHeader();
3225 DE_ASSERT(cbottoms ==header->handleCount);
3226
3227 // finally update bottom-level AS addresses before top-level AS deserialization
3228 for (std::size_t i = 0; i < cbottoms; ++i)
3229 {
3230 header->handleArray[i] = newAddresses[i+1];
3231 }
3232 }
3233
getSerializingSizes(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const deUint32 queueFamilyIndex)3234 std::vector<VkDeviceSize> TopLevelAccelerationStructureKHR::getSerializingSizes (const DeviceInterface& vk,
3235 const VkDevice device,
3236 const VkQueue queue,
3237 const deUint32 queueFamilyIndex)
3238 {
3239 const deUint32 queryCount(deUint32(m_bottomLevelInstances.size()) + 1);
3240 std::vector<VkAccelerationStructureKHR> handles(queryCount);
3241 std::vector<VkDeviceSize> sizes(queryCount);
3242
3243 handles[0] = m_accelerationStructureKHR.get();
3244
3245 for (deUint32 h = 1; h < queryCount; ++h)
3246 handles[h] = *m_bottomLevelInstances[h-1].get()->getPtr();
3247
3248 if (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR == m_buildType)
3249 queryAccelerationStructureSize(vk, device, DE_NULL, handles, m_buildType, DE_NULL, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3250 else
3251 {
3252 const Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, queueFamilyIndex);
3253 const Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3254 const Move<VkQueryPool> queryPool = makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3255
3256 beginCommandBuffer(vk, *cmdBuffer);
3257 queryAccelerationStructureSize(vk, device, *cmdBuffer, handles, m_buildType, *queryPool, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3258 endCommandBuffer(vk, *cmdBuffer);
3259 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
3260
3261 VK_CHECK(vk.getQueryPoolResults(device, *queryPool, 0u, queryCount, queryCount * sizeof(VkDeviceSize), sizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3262 }
3263
3264 return sizes;
3265 }
3266
getSerializingAddresses(const DeviceInterface & vk,const VkDevice device) const3267 std::vector<deUint64> TopLevelAccelerationStructureKHR::getSerializingAddresses (const DeviceInterface& vk, const VkDevice device) const
3268 {
3269 std::vector<deUint64> result(m_bottomLevelInstances.size() + 1);
3270
3271 VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
3272 {
3273 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // VkStructureType sType;
3274 DE_NULL, // const void* pNext;
3275 DE_NULL // VkAccelerationStructureKHR accelerationStructure;
3276 };
3277
3278 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3279 {
3280 asDeviceAddressInfo.accelerationStructure = m_accelerationStructureKHR.get();
3281 result[0] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3282 }
3283 else
3284 {
3285 result[0] = deUint64(getPtr()->getInternal());
3286 }
3287
3288 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3289 {
3290 const BottomLevelAccelerationStructure& bottomLevelAccelerationStructure = *m_bottomLevelInstances[instanceNdx];
3291 const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
3292
3293 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3294 {
3295 asDeviceAddressInfo.accelerationStructure = accelerationStructureKHR;
3296 result[instanceNdx+1] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3297 }
3298 else
3299 {
3300 result[instanceNdx+1] = deUint64(accelerationStructureKHR.getInternal());
3301 }
3302 }
3303
3304 return result;
3305 }
3306
getPtr(void) const3307 const VkAccelerationStructureKHR* TopLevelAccelerationStructureKHR::getPtr (void) const
3308 {
3309 return &m_accelerationStructureKHR.get();
3310 }
3311
prepareInstances(const DeviceInterface & vk,const VkDevice device,VkAccelerationStructureGeometryKHR & accelerationStructureGeometryKHR,std::vector<deUint32> & maxPrimitiveCounts)3312 void TopLevelAccelerationStructureKHR::prepareInstances (const DeviceInterface& vk,
3313 const VkDevice device,
3314 VkAccelerationStructureGeometryKHR& accelerationStructureGeometryKHR,
3315 std::vector<deUint32>& maxPrimitiveCounts)
3316 {
3317 maxPrimitiveCounts.resize(1);
3318 maxPrimitiveCounts[0] = static_cast<deUint32>(m_bottomLevelInstances.size());
3319
3320 VkDeviceOrHostAddressConstKHR instancesData;
3321 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3322 {
3323 if(m_instanceBuffer.get() != DE_NULL)
3324 {
3325 if (m_useArrayOfPointers)
3326 {
3327 deUint8* bufferStart = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3328 VkDeviceSize bufferOffset = 0;
3329 VkDeviceOrHostAddressConstKHR firstInstance = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3330 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3331 {
3332 VkDeviceOrHostAddressConstKHR currentInstance;
3333 currentInstance.deviceAddress = firstInstance.deviceAddress + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3334
3335 deMemcpy(&bufferStart[bufferOffset], ¤tInstance, sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress));
3336 bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress);
3337 }
3338 flushMappedMemoryRange(vk, device, m_instanceAddressBuffer->getAllocation().getMemory(), m_instanceAddressBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
3339
3340 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceAddressBuffer->get(), 0);
3341 }
3342 else
3343 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3344 }
3345 else
3346 instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3347 }
3348 else
3349 {
3350 if (m_instanceBuffer.get() != DE_NULL)
3351 {
3352 if (m_useArrayOfPointers)
3353 {
3354 deUint8* bufferStart = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3355 VkDeviceSize bufferOffset = 0;
3356 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3357 {
3358 VkDeviceOrHostAddressConstKHR currentInstance;
3359 currentInstance.hostAddress = (deUint8*)m_instanceBuffer->getAllocation().getHostPtr() + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3360
3361 deMemcpy(&bufferStart[bufferOffset], ¤tInstance, sizeof(VkDeviceOrHostAddressConstKHR::hostAddress));
3362 bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
3363 }
3364 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceAddressBuffer->getAllocation().getHostPtr());
3365 }
3366 else
3367 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceBuffer->getAllocation().getHostPtr());
3368 }
3369 else
3370 instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3371 }
3372
3373 VkAccelerationStructureGeometryInstancesDataKHR accelerationStructureGeometryInstancesDataKHR =
3374 {
3375 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // VkStructureType sType;
3376 DE_NULL, // const void* pNext;
3377 (VkBool32)( m_useArrayOfPointers ? DE_TRUE : DE_FALSE ), // VkBool32 arrayOfPointers;
3378 instancesData // VkDeviceOrHostAddressConstKHR data;
3379 };
3380
3381 accelerationStructureGeometryKHR =
3382 {
3383 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // VkStructureType sType;
3384 DE_NULL, // const void* pNext;
3385 VK_GEOMETRY_TYPE_INSTANCES_KHR, // VkGeometryTypeKHR geometryType;
3386 makeVkAccelerationStructureInstancesDataKHR(accelerationStructureGeometryInstancesDataKHR), // VkAccelerationStructureGeometryDataKHR geometry;
3387 (VkGeometryFlagsKHR)0u // VkGeometryFlagsKHR flags;
3388 };
3389 }
3390
getRequiredAllocationCount(void)3391 deUint32 TopLevelAccelerationStructure::getRequiredAllocationCount (void)
3392 {
3393 return TopLevelAccelerationStructureKHR::getRequiredAllocationCount();
3394 }
3395
makeTopLevelAccelerationStructure()3396 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure ()
3397 {
3398 return de::MovePtr<TopLevelAccelerationStructure>(new TopLevelAccelerationStructureKHR);
3399 }
3400
queryAccelerationStructureSizeKHR(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3401 bool queryAccelerationStructureSizeKHR (const DeviceInterface& vk,
3402 const VkDevice device,
3403 const VkCommandBuffer cmdBuffer,
3404 const std::vector<VkAccelerationStructureKHR>& accelerationStructureHandles,
3405 VkAccelerationStructureBuildTypeKHR buildType,
3406 const VkQueryPool queryPool,
3407 VkQueryType queryType,
3408 deUint32 firstQuery,
3409 std::vector<VkDeviceSize>& results)
3410 {
3411 DE_ASSERT(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR);
3412
3413 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3414 {
3415 // queryPool must be large enough to contain at least (firstQuery + accelerationStructureHandles.size()) queries
3416 vk.cmdResetQueryPool(cmdBuffer, queryPool, firstQuery, deUint32(accelerationStructureHandles.size()));
3417 vk.cmdWriteAccelerationStructuresPropertiesKHR(cmdBuffer, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType, queryPool, firstQuery);
3418 // results cannot be retrieved to CPU at the moment - you need to do it using getQueryPoolResults after cmdBuffer is executed. Meanwhile function returns a vector of 0s.
3419 results.resize(accelerationStructureHandles.size(), 0u);
3420 return false;
3421 }
3422 // buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
3423 results.resize(accelerationStructureHandles.size(), 0u);
3424 vk.writeAccelerationStructuresPropertiesKHR(device, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType,
3425 sizeof(VkDeviceSize) * accelerationStructureHandles.size(), results.data(), sizeof(VkDeviceSize));
3426 // results will contain proper values
3427 return true;
3428 }
3429
queryAccelerationStructureSize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3430 bool queryAccelerationStructureSize (const DeviceInterface& vk,
3431 const VkDevice device,
3432 const VkCommandBuffer cmdBuffer,
3433 const std::vector<VkAccelerationStructureKHR>& accelerationStructureHandles,
3434 VkAccelerationStructureBuildTypeKHR buildType,
3435 const VkQueryPool queryPool,
3436 VkQueryType queryType,
3437 deUint32 firstQuery,
3438 std::vector<VkDeviceSize>& results)
3439 {
3440 return queryAccelerationStructureSizeKHR(vk, device, cmdBuffer, accelerationStructureHandles, buildType, queryPool, queryType, firstQuery, results);
3441 }
3442
RayTracingPipeline()3443 RayTracingPipeline::RayTracingPipeline ()
3444 : m_shadersModules ()
3445 , m_pipelineLibraries ()
3446 , m_shaderCreateInfos ()
3447 , m_shadersGroupCreateInfos ()
3448 , m_pipelineCreateFlags (0U)
3449 , m_pipelineCreateFlags2 (0U)
3450 , m_maxRecursionDepth (1U)
3451 , m_maxPayloadSize (0U)
3452 , m_maxAttributeSize (0U)
3453 , m_deferredOperation (false)
3454 , m_workerThreadCount (0)
3455 {
3456 }
3457
~RayTracingPipeline()3458 RayTracingPipeline::~RayTracingPipeline ()
3459 {
3460 }
3461
3462 #define CHECKED_ASSIGN_SHADER(SHADER, STAGE) \
3463 if (SHADER == VK_SHADER_UNUSED_KHR) \
3464 SHADER = STAGE; \
3465 else \
3466 TCU_THROW(InternalError, "Attempt to reassign shader")
3467
addShader(VkShaderStageFlagBits shaderStage,Move<VkShaderModule> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfo,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3468 void RayTracingPipeline::addShader (VkShaderStageFlagBits shaderStage,
3469 Move<VkShaderModule> shaderModule,
3470 deUint32 group,
3471 const VkSpecializationInfo* specializationInfo,
3472 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,
3473 const void* pipelineShaderStageCreateInfopNext)
3474 {
3475 addShader(shaderStage, makeVkSharedPtr(shaderModule), group, specializationInfo, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3476 }
3477
addShader(VkShaderStageFlagBits shaderStage,de::SharedPtr<Move<VkShaderModule>> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3478 void RayTracingPipeline::addShader (VkShaderStageFlagBits shaderStage,
3479 de::SharedPtr<Move<VkShaderModule>> shaderModule,
3480 deUint32 group,
3481 const VkSpecializationInfo* specializationInfoPtr,
3482 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,
3483 const void* pipelineShaderStageCreateInfopNext)
3484 {
3485 addShader(shaderStage, **shaderModule, group, specializationInfoPtr, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3486 m_shadersModules.push_back(shaderModule);
3487 }
3488
addShader(VkShaderStageFlagBits shaderStage,VkShaderModule shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3489 void RayTracingPipeline::addShader (VkShaderStageFlagBits shaderStage,
3490 VkShaderModule shaderModule,
3491 deUint32 group,
3492 const VkSpecializationInfo* specializationInfoPtr,
3493 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,
3494 const void* pipelineShaderStageCreateInfopNext)
3495 {
3496 if (group >= m_shadersGroupCreateInfos.size())
3497 {
3498 for (size_t groupNdx = m_shadersGroupCreateInfos.size(); groupNdx <= group; ++groupNdx)
3499 {
3500 VkRayTracingShaderGroupCreateInfoKHR shaderGroupCreateInfo =
3501 {
3502 VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, // VkStructureType sType;
3503 DE_NULL, // const void* pNext;
3504 VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR, // VkRayTracingShaderGroupTypeKHR type;
3505 VK_SHADER_UNUSED_KHR, // deUint32 generalShader;
3506 VK_SHADER_UNUSED_KHR, // deUint32 closestHitShader;
3507 VK_SHADER_UNUSED_KHR, // deUint32 anyHitShader;
3508 VK_SHADER_UNUSED_KHR, // deUint32 intersectionShader;
3509 DE_NULL, // const void* pShaderGroupCaptureReplayHandle;
3510 };
3511
3512 m_shadersGroupCreateInfos.push_back(shaderGroupCreateInfo);
3513 }
3514 }
3515
3516 const deUint32 shaderStageNdx = (deUint32)m_shaderCreateInfos.size();
3517 VkRayTracingShaderGroupCreateInfoKHR& shaderGroupCreateInfo = m_shadersGroupCreateInfos[group];
3518
3519 switch (shaderStage)
3520 {
3521 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader, shaderStageNdx); break;
3522 case VK_SHADER_STAGE_MISS_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader, shaderStageNdx); break;
3523 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader, shaderStageNdx); break;
3524 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.anyHitShader, shaderStageNdx); break;
3525 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.closestHitShader, shaderStageNdx); break;
3526 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.intersectionShader, shaderStageNdx); break;
3527 default: TCU_THROW(InternalError, "Unacceptable stage");
3528 }
3529
3530 switch (shaderStage)
3531 {
3532 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
3533 case VK_SHADER_STAGE_MISS_BIT_KHR:
3534 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
3535 {
3536 DE_ASSERT(shaderGroupCreateInfo.type == VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR);
3537 shaderGroupCreateInfo.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
3538
3539 break;
3540 }
3541
3542 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
3543 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
3544 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
3545 {
3546 DE_ASSERT(shaderGroupCreateInfo.type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR);
3547 shaderGroupCreateInfo.type = (shaderGroupCreateInfo.intersectionShader == VK_SHADER_UNUSED_KHR)
3548 ? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR
3549 : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR;
3550
3551 break;
3552 }
3553
3554 default: TCU_THROW(InternalError, "Unacceptable stage");
3555 }
3556
3557 {
3558 const VkPipelineShaderStageCreateInfo shaderCreateInfo =
3559 {
3560 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3561 pipelineShaderStageCreateInfopNext, // const void* pNext;
3562 pipelineShaderStageCreateFlags, // VkPipelineShaderStageCreateFlags flags;
3563 shaderStage, // VkShaderStageFlagBits stage;
3564 shaderModule, // VkShaderModule module;
3565 "main", // const char* pName;
3566 specializationInfoPtr, // const VkSpecializationInfo* pSpecializationInfo;
3567 };
3568
3569 m_shaderCreateInfos.push_back(shaderCreateInfo);
3570 }
3571 }
3572
setGroupCaptureReplayHandle(uint32_t group,const void * pShaderGroupCaptureReplayHandle)3573 void RayTracingPipeline::setGroupCaptureReplayHandle (uint32_t group, const void* pShaderGroupCaptureReplayHandle)
3574 {
3575 DE_ASSERT(static_cast<size_t>(group) < m_shadersGroupCreateInfos.size());
3576 m_shadersGroupCreateInfos[group].pShaderGroupCaptureReplayHandle = pShaderGroupCaptureReplayHandle;
3577 }
3578
addLibrary(de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)3579 void RayTracingPipeline::addLibrary (de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)
3580 {
3581 m_pipelineLibraries.push_back(pipelineLibrary);
3582 }
3583
getShaderGroupCount(void)3584 uint32_t RayTracingPipeline::getShaderGroupCount (void)
3585 {
3586 return de::sizeU32(m_shadersGroupCreateInfos);
3587 }
3588
getFullShaderGroupCount(void)3589 uint32_t RayTracingPipeline::getFullShaderGroupCount (void)
3590 {
3591 uint32_t totalCount = getShaderGroupCount();
3592
3593 for (const auto& lib : m_pipelineLibraries)
3594 totalCount += lib->get()->getFullShaderGroupCount();
3595
3596 return totalCount;
3597 }
3598
createPipelineKHR(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3599 Move<VkPipeline> RayTracingPipeline::createPipelineKHR (const DeviceInterface& vk,
3600 const VkDevice device,
3601 const VkPipelineLayout pipelineLayout,
3602 const std::vector<VkPipeline>& pipelineLibraries,
3603 const VkPipelineCache pipelineCache)
3604 {
3605 for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3606 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3607
3608 VkPipelineLibraryCreateInfoKHR librariesCreateInfo =
3609 {
3610 VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR, // VkStructureType sType;
3611 DE_NULL, // const void* pNext;
3612 de::sizeU32(pipelineLibraries), // deUint32 libraryCount;
3613 de::dataOrNull(pipelineLibraries) // VkPipeline* pLibraries;
3614 };
3615 const VkRayTracingPipelineInterfaceCreateInfoKHR pipelineInterfaceCreateInfo =
3616 {
3617 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR, // VkStructureType sType;
3618 DE_NULL, // const void* pNext;
3619 m_maxPayloadSize, // deUint32 maxPayloadSize;
3620 m_maxAttributeSize // deUint32 maxAttributeSize;
3621 };
3622 const bool addPipelineInterfaceCreateInfo = m_maxPayloadSize != 0 || m_maxAttributeSize != 0;
3623 const VkRayTracingPipelineInterfaceCreateInfoKHR* pipelineInterfaceCreateInfoPtr = addPipelineInterfaceCreateInfo ? &pipelineInterfaceCreateInfo : DE_NULL;
3624 const VkPipelineLibraryCreateInfoKHR* librariesCreateInfoPtr = (pipelineLibraries.empty() ? nullptr : &librariesCreateInfo);
3625
3626 Move<VkDeferredOperationKHR> deferredOperation;
3627 if (m_deferredOperation)
3628 deferredOperation = createDeferredOperationKHR(vk, device);
3629
3630 VkPipelineDynamicStateCreateInfo dynamicStateCreateInfo =
3631 {
3632 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType;
3633 DE_NULL, // const void* pNext;
3634 0, // VkPipelineDynamicStateCreateFlags flags;
3635 static_cast<deUint32>(m_dynamicStates.size() ), // deUint32 dynamicStateCount;
3636 m_dynamicStates.data(), // const VkDynamicState* pDynamicStates;
3637 };
3638
3639 VkRayTracingPipelineCreateInfoKHR pipelineCreateInfo
3640 {
3641 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, // VkStructureType sType;
3642 DE_NULL, // const void* pNext;
3643 m_pipelineCreateFlags, // VkPipelineCreateFlags flags;
3644 de::sizeU32(m_shaderCreateInfos), // deUint32 stageCount;
3645 de::dataOrNull(m_shaderCreateInfos), // const VkPipelineShaderStageCreateInfo* pStages;
3646 de::sizeU32(m_shadersGroupCreateInfos), // deUint32 groupCount;
3647 de::dataOrNull(m_shadersGroupCreateInfos), // const VkRayTracingShaderGroupCreateInfoKHR* pGroups;
3648 m_maxRecursionDepth, // deUint32 maxRecursionDepth;
3649 librariesCreateInfoPtr, // VkPipelineLibraryCreateInfoKHR* pLibraryInfo;
3650 pipelineInterfaceCreateInfoPtr, // VkRayTracingPipelineInterfaceCreateInfoKHR* pLibraryInterface;
3651 &dynamicStateCreateInfo, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
3652 pipelineLayout, // VkPipelineLayout layout;
3653 (VkPipeline)DE_NULL, // VkPipeline basePipelineHandle;
3654 0, // deInt32 basePipelineIndex;
3655 };
3656 VkPipeline object = DE_NULL;
3657 VkResult result = vk.createRayTracingPipelinesKHR(device, deferredOperation.get(), pipelineCache, 1u, &pipelineCreateInfo, DE_NULL, &object);
3658 const bool allowCompileRequired = ((m_pipelineCreateFlags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) != 0);
3659
3660 VkPipelineCreateFlags2CreateInfoKHR pipelineFlags2CreateInfo = initVulkanStructure();
3661 if (m_pipelineCreateFlags2)
3662 {
3663 pipelineFlags2CreateInfo.flags = m_pipelineCreateFlags2;
3664 pipelineCreateInfo.pNext = &pipelineFlags2CreateInfo;
3665 pipelineCreateInfo.flags = 0;
3666 }
3667
3668 if (m_deferredOperation)
3669 {
3670 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS || (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED));
3671 finishDeferredOperation(vk, device, deferredOperation.get(), m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3672 }
3673
3674 if (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED)
3675 throw CompileRequiredError("createRayTracingPipelinesKHR returned VK_PIPELINE_COMPILE_REQUIRED");
3676
3677 Move<VkPipeline> pipeline (check<VkPipeline>(object), Deleter<VkPipeline>(vk, device, DE_NULL));
3678 return pipeline;
3679 }
3680
3681
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<de::SharedPtr<Move<VkPipeline>>> & pipelineLibraries)3682 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface& vk,
3683 const VkDevice device,
3684 const VkPipelineLayout pipelineLayout,
3685 const std::vector<de::SharedPtr<Move<VkPipeline>>>& pipelineLibraries)
3686 {
3687 std::vector<VkPipeline> rawPipelines;
3688 rawPipelines.reserve(pipelineLibraries.size());
3689 for (const auto& lib : pipelineLibraries)
3690 rawPipelines.push_back(lib.get()->get());
3691
3692 return createPipelineKHR(vk, device, pipelineLayout, rawPipelines);
3693 }
3694
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3695 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface& vk,
3696 const VkDevice device,
3697 const VkPipelineLayout pipelineLayout,
3698 const std::vector<VkPipeline>& pipelineLibraries,
3699 const VkPipelineCache pipelineCache)
3700 {
3701 return createPipelineKHR(vk, device, pipelineLayout, pipelineLibraries, pipelineCache);
3702 }
3703
createPipelineWithLibraries(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout)3704 std::vector<de::SharedPtr<Move<VkPipeline>>> RayTracingPipeline::createPipelineWithLibraries (const DeviceInterface& vk,
3705 const VkDevice device,
3706 const VkPipelineLayout pipelineLayout)
3707 {
3708 for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3709 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3710
3711 DE_ASSERT(m_shaderCreateInfos.size() > 0);
3712 DE_ASSERT(m_shadersGroupCreateInfos.size() > 0);
3713
3714 std::vector<de::SharedPtr<Move<VkPipeline>>> result, allLibraries, firstLibraries;
3715 for(auto it=begin(m_pipelineLibraries), eit=end(m_pipelineLibraries); it!=eit; ++it)
3716 {
3717 auto childLibraries = (*it)->get()->createPipelineWithLibraries(vk, device, pipelineLayout);
3718 DE_ASSERT(childLibraries.size() > 0);
3719 firstLibraries.push_back(childLibraries[0]);
3720 std::copy(begin(childLibraries), end(childLibraries), std::back_inserter(allLibraries));
3721 }
3722 result.push_back(makeVkSharedPtr(createPipeline(vk, device, pipelineLayout, firstLibraries)));
3723 std::copy(begin(allLibraries), end(allLibraries), std::back_inserter(result));
3724 return result;
3725 }
3726
getShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 shaderGroupHandleSize,const deUint32 firstGroup,const deUint32 groupCount) const3727 std::vector<uint8_t> RayTracingPipeline::getShaderGroupHandles (const DeviceInterface& vk,
3728 const VkDevice device,
3729 const VkPipeline pipeline,
3730 const deUint32 shaderGroupHandleSize,
3731 const deUint32 firstGroup,
3732 const deUint32 groupCount) const
3733 {
3734 const auto handleArraySizeBytes = groupCount * shaderGroupHandleSize;
3735 std::vector<uint8_t> shaderHandles (handleArraySizeBytes);
3736
3737 VK_CHECK(getRayTracingShaderGroupHandles(vk, device, pipeline,
3738 firstGroup, groupCount,
3739 static_cast<uintptr_t>(shaderHandles.size()), de::dataOrNull(shaderHandles)));
3740
3741 return shaderHandles;
3742 }
3743
getShaderGroupReplayHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 shaderGroupHandleReplaySize,const deUint32 firstGroup,const deUint32 groupCount) const3744 std::vector<uint8_t> RayTracingPipeline::getShaderGroupReplayHandles (const DeviceInterface &vk,
3745 const VkDevice device,
3746 const VkPipeline pipeline,
3747 const deUint32 shaderGroupHandleReplaySize,
3748 const deUint32 firstGroup,
3749 const deUint32 groupCount) const
3750 {
3751 const auto handleArraySizeBytes = groupCount * shaderGroupHandleReplaySize;
3752 std::vector<uint8_t> shaderHandles (handleArraySizeBytes);
3753
3754 VK_CHECK(getRayTracingCaptureReplayShaderGroupHandles(vk, device, pipeline,
3755 firstGroup, groupCount,
3756 static_cast<uintptr_t>(shaderHandles.size()), de::dataOrNull(shaderHandles)));
3757
3758 return shaderHandles;
3759 }
3760
createShaderBindingTable(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,Allocator & allocator,const deUint32 & shaderGroupHandleSize,const deUint32 shaderGroupBaseAlignment,const deUint32 & firstGroup,const deUint32 & groupCount,const VkBufferCreateFlags & additionalBufferCreateFlags,const VkBufferUsageFlags & additionalBufferUsageFlags,const MemoryRequirement & additionalMemoryRequirement,const VkDeviceAddress & opaqueCaptureAddress,const deUint32 shaderBindingTableOffset,const deUint32 shaderRecordSize,const void ** shaderGroupDataPtrPerGroup,const bool autoAlignRecords)3761 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable (const DeviceInterface& vk,
3762 const VkDevice device,
3763 const VkPipeline pipeline,
3764 Allocator& allocator,
3765 const deUint32& shaderGroupHandleSize,
3766 const deUint32 shaderGroupBaseAlignment,
3767 const deUint32& firstGroup,
3768 const deUint32& groupCount,
3769 const VkBufferCreateFlags& additionalBufferCreateFlags,
3770 const VkBufferUsageFlags& additionalBufferUsageFlags,
3771 const MemoryRequirement& additionalMemoryRequirement,
3772 const VkDeviceAddress& opaqueCaptureAddress,
3773 const deUint32 shaderBindingTableOffset,
3774 const deUint32 shaderRecordSize,
3775 const void** shaderGroupDataPtrPerGroup,
3776 const bool autoAlignRecords)
3777 {
3778 const auto shaderHandles = getShaderGroupHandles(vk, device, pipeline, shaderGroupHandleSize, firstGroup, groupCount);
3779 return createShaderBindingTable(vk, device, allocator,
3780 shaderGroupHandleSize, shaderGroupBaseAlignment, shaderHandles,
3781 additionalBufferCreateFlags, additionalBufferUsageFlags, additionalMemoryRequirement,
3782 opaqueCaptureAddress,
3783 shaderBindingTableOffset, shaderRecordSize, shaderGroupDataPtrPerGroup,
3784 autoAlignRecords);
3785 }
3786
createShaderBindingTable(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const deUint32 shaderGroupHandleSize,const deUint32 shaderGroupBaseAlignment,const std::vector<uint8_t> & shaderHandles,const VkBufferCreateFlags additionalBufferCreateFlags,const VkBufferUsageFlags additionalBufferUsageFlags,const MemoryRequirement & additionalMemoryRequirement,const VkDeviceAddress opaqueCaptureAddress,const deUint32 shaderBindingTableOffset,const deUint32 shaderRecordSize,const void ** shaderGroupDataPtrPerGroup,const bool autoAlignRecords)3787 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable (const DeviceInterface& vk,
3788 const VkDevice device,
3789 Allocator& allocator,
3790 const deUint32 shaderGroupHandleSize,
3791 const deUint32 shaderGroupBaseAlignment,
3792 const std::vector<uint8_t>& shaderHandles,
3793 const VkBufferCreateFlags additionalBufferCreateFlags,
3794 const VkBufferUsageFlags additionalBufferUsageFlags,
3795 const MemoryRequirement& additionalMemoryRequirement,
3796 const VkDeviceAddress opaqueCaptureAddress,
3797 const deUint32 shaderBindingTableOffset,
3798 const deUint32 shaderRecordSize,
3799 const void** shaderGroupDataPtrPerGroup,
3800 const bool autoAlignRecords)
3801 {
3802 DE_ASSERT(shaderGroupBaseAlignment != 0u);
3803 DE_ASSERT((shaderBindingTableOffset % shaderGroupBaseAlignment) == 0);
3804 DE_UNREF(shaderGroupBaseAlignment);
3805
3806 const auto groupCount = de::sizeU32(shaderHandles) / shaderGroupHandleSize;
3807 const auto totalEntrySize = (autoAlignRecords ? (deAlign32(shaderGroupHandleSize + shaderRecordSize, shaderGroupHandleSize)) : (shaderGroupHandleSize + shaderRecordSize));
3808 const deUint32 sbtSize = shaderBindingTableOffset + groupCount * totalEntrySize;
3809 const VkBufferUsageFlags sbtFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | additionalBufferUsageFlags;
3810 VkBufferCreateInfo sbtCreateInfo = makeBufferCreateInfo(sbtSize, sbtFlags);
3811 sbtCreateInfo.flags |= additionalBufferCreateFlags;
3812 VkBufferUsageFlags2CreateInfoKHR bufferUsageFlags2 = vk::initVulkanStructure();
3813 VkBufferOpaqueCaptureAddressCreateInfo sbtCaptureAddressInfo =
3814 {
3815 VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO, // VkStructureType sType;
3816 DE_NULL, // const void* pNext;
3817 deUint64(opaqueCaptureAddress) // deUint64 opaqueCaptureAddress;
3818 };
3819
3820 // when maintenance5 is tested then m_pipelineCreateFlags2 is non-zero
3821 if (m_pipelineCreateFlags2)
3822 {
3823 bufferUsageFlags2.usage = (VkBufferUsageFlags2KHR)sbtFlags;
3824 sbtCreateInfo.pNext = &bufferUsageFlags2;
3825 sbtCreateInfo.usage = 0;
3826 }
3827
3828 if (opaqueCaptureAddress != 0u)
3829 {
3830 sbtCreateInfo.pNext = &sbtCaptureAddressInfo;
3831 sbtCreateInfo.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
3832 }
3833 const MemoryRequirement sbtMemRequirements = MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress | additionalMemoryRequirement;
3834 de::MovePtr<BufferWithMemory> sbtBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, sbtCreateInfo, sbtMemRequirements));
3835 vk::Allocation& sbtAlloc = sbtBuffer->getAllocation();
3836
3837 // Copy handles to table, leaving space for ShaderRecordKHR after each handle.
3838 deUint8* shaderBegin = (deUint8*)sbtAlloc.getHostPtr() + shaderBindingTableOffset;
3839 for (deUint32 idx = 0; idx < groupCount; ++idx)
3840 {
3841 const deUint8* shaderSrcPos = shaderHandles.data() + idx * shaderGroupHandleSize;
3842 deUint8* shaderDstPos = shaderBegin + idx * totalEntrySize;
3843 deMemcpy(shaderDstPos, shaderSrcPos, shaderGroupHandleSize);
3844
3845 if (shaderGroupDataPtrPerGroup != nullptr &&
3846 shaderGroupDataPtrPerGroup[idx] != nullptr)
3847 {
3848 DE_ASSERT(sbtSize >= static_cast<deUint32>(shaderDstPos - shaderBegin) + shaderGroupHandleSize);
3849
3850 deMemcpy( shaderDstPos + shaderGroupHandleSize,
3851 shaderGroupDataPtrPerGroup[idx],
3852 shaderRecordSize);
3853 }
3854 }
3855
3856 flushMappedMemoryRange(vk, device, sbtAlloc.getMemory(), sbtAlloc.getOffset(), VK_WHOLE_SIZE);
3857
3858 return sbtBuffer;
3859 }
3860
setCreateFlags(const VkPipelineCreateFlags & pipelineCreateFlags)3861 void RayTracingPipeline::setCreateFlags (const VkPipelineCreateFlags& pipelineCreateFlags)
3862 {
3863 m_pipelineCreateFlags = pipelineCreateFlags;
3864 }
3865
setCreateFlags2(const VkPipelineCreateFlags2KHR & pipelineCreateFlags2)3866 void RayTracingPipeline::setCreateFlags2 (const VkPipelineCreateFlags2KHR& pipelineCreateFlags2)
3867 {
3868 m_pipelineCreateFlags2 = pipelineCreateFlags2;
3869 }
3870
setMaxRecursionDepth(const deUint32 & maxRecursionDepth)3871 void RayTracingPipeline::setMaxRecursionDepth (const deUint32& maxRecursionDepth)
3872 {
3873 m_maxRecursionDepth = maxRecursionDepth;
3874 }
3875
setMaxPayloadSize(const deUint32 & maxPayloadSize)3876 void RayTracingPipeline::setMaxPayloadSize (const deUint32& maxPayloadSize)
3877 {
3878 m_maxPayloadSize = maxPayloadSize;
3879 }
3880
setMaxAttributeSize(const deUint32 & maxAttributeSize)3881 void RayTracingPipeline::setMaxAttributeSize (const deUint32& maxAttributeSize)
3882 {
3883 m_maxAttributeSize = maxAttributeSize;
3884 }
3885
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)3886 void RayTracingPipeline::setDeferredOperation (const bool deferredOperation,
3887 const deUint32 workerThreadCount)
3888 {
3889 m_deferredOperation = deferredOperation;
3890 m_workerThreadCount = workerThreadCount;
3891 }
3892
addDynamicState(const VkDynamicState & dynamicState)3893 void RayTracingPipeline::addDynamicState(const VkDynamicState& dynamicState)
3894 {
3895 m_dynamicStates.push_back(dynamicState);
3896 }
3897
3898 class RayTracingPropertiesKHR : public RayTracingProperties
3899 {
3900 public:
3901 RayTracingPropertiesKHR () = delete;
3902 RayTracingPropertiesKHR (const InstanceInterface& vki,
3903 const VkPhysicalDevice physicalDevice);
3904 virtual ~RayTracingPropertiesKHR ();
3905
getShaderGroupHandleSize(void)3906 uint32_t getShaderGroupHandleSize (void) override { return m_rayTracingPipelineProperties.shaderGroupHandleSize; }
getShaderGroupHandleAlignment(void)3907 uint32_t getShaderGroupHandleAlignment (void) override { return m_rayTracingPipelineProperties.shaderGroupHandleAlignment; }
getShaderGroupHandleCaptureReplaySize(void)3908 deUint32 getShaderGroupHandleCaptureReplaySize (void) override { return m_rayTracingPipelineProperties.shaderGroupHandleCaptureReplaySize; }
getMaxRecursionDepth(void)3909 uint32_t getMaxRecursionDepth (void) override { return m_rayTracingPipelineProperties.maxRayRecursionDepth; }
getMaxShaderGroupStride(void)3910 uint32_t getMaxShaderGroupStride (void) override { return m_rayTracingPipelineProperties.maxShaderGroupStride; }
getShaderGroupBaseAlignment(void)3911 uint32_t getShaderGroupBaseAlignment (void) override { return m_rayTracingPipelineProperties.shaderGroupBaseAlignment; }
getMaxGeometryCount(void)3912 uint64_t getMaxGeometryCount (void) override { return m_accelerationStructureProperties.maxGeometryCount; }
getMaxInstanceCount(void)3913 uint64_t getMaxInstanceCount (void) override { return m_accelerationStructureProperties.maxInstanceCount; }
getMaxPrimitiveCount(void)3914 uint64_t getMaxPrimitiveCount (void) override { return m_accelerationStructureProperties.maxPrimitiveCount; }
getMaxDescriptorSetAccelerationStructures(void)3915 uint32_t getMaxDescriptorSetAccelerationStructures (void) override { return m_accelerationStructureProperties.maxDescriptorSetAccelerationStructures; }
getMaxRayDispatchInvocationCount(void)3916 uint32_t getMaxRayDispatchInvocationCount (void) override { return m_rayTracingPipelineProperties.maxRayDispatchInvocationCount; }
getMaxRayHitAttributeSize(void)3917 uint32_t getMaxRayHitAttributeSize (void) override { return m_rayTracingPipelineProperties.maxRayHitAttributeSize; }
getMaxMemoryAllocationCount(void)3918 uint32_t getMaxMemoryAllocationCount (void) override { return m_maxMemoryAllocationCount; }
3919
3920 protected:
3921 VkPhysicalDeviceAccelerationStructurePropertiesKHR m_accelerationStructureProperties;
3922 VkPhysicalDeviceRayTracingPipelinePropertiesKHR m_rayTracingPipelineProperties;
3923 deUint32 m_maxMemoryAllocationCount;
3924 };
3925
~RayTracingPropertiesKHR()3926 RayTracingPropertiesKHR::~RayTracingPropertiesKHR ()
3927 {
3928 }
3929
RayTracingPropertiesKHR(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3930 RayTracingPropertiesKHR::RayTracingPropertiesKHR (const InstanceInterface& vki,
3931 const VkPhysicalDevice physicalDevice)
3932 : RayTracingProperties (vki, physicalDevice)
3933 {
3934 m_accelerationStructureProperties = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3935 m_rayTracingPipelineProperties = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3936 m_maxMemoryAllocationCount = getPhysicalDeviceProperties(vki, physicalDevice).limits.maxMemoryAllocationCount;
3937 }
3938
makeRayTracingProperties(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3939 de::MovePtr<RayTracingProperties> makeRayTracingProperties (const InstanceInterface& vki,
3940 const VkPhysicalDevice physicalDevice)
3941 {
3942 return de::MovePtr<RayTracingProperties>(new RayTracingPropertiesKHR(vki, physicalDevice));
3943 }
3944
cmdTraceRaysKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3945 static inline void cmdTraceRaysKHR (const DeviceInterface& vk,
3946 VkCommandBuffer commandBuffer,
3947 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3948 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3949 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3950 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3951 deUint32 width,
3952 deUint32 height,
3953 deUint32 depth)
3954 {
3955 return vk.cmdTraceRaysKHR(commandBuffer,
3956 raygenShaderBindingTableRegion,
3957 missShaderBindingTableRegion,
3958 hitShaderBindingTableRegion,
3959 callableShaderBindingTableRegion,
3960 width,
3961 height,
3962 depth);
3963 }
3964
3965
cmdTraceRays(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3966 void cmdTraceRays (const DeviceInterface& vk,
3967 VkCommandBuffer commandBuffer,
3968 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3969 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3970 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3971 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3972 deUint32 width,
3973 deUint32 height,
3974 deUint32 depth)
3975 {
3976 DE_ASSERT(raygenShaderBindingTableRegion != DE_NULL);
3977 DE_ASSERT(missShaderBindingTableRegion != DE_NULL);
3978 DE_ASSERT(hitShaderBindingTableRegion != DE_NULL);
3979 DE_ASSERT(callableShaderBindingTableRegion != DE_NULL);
3980
3981 return cmdTraceRaysKHR(vk,
3982 commandBuffer,
3983 raygenShaderBindingTableRegion,
3984 missShaderBindingTableRegion,
3985 hitShaderBindingTableRegion,
3986 callableShaderBindingTableRegion,
3987 width,
3988 height,
3989 depth);
3990 }
3991
cmdTraceRaysIndirectKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3992 static inline void cmdTraceRaysIndirectKHR (const DeviceInterface& vk,
3993 VkCommandBuffer commandBuffer,
3994 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3995 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3996 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3997 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3998 VkDeviceAddress indirectDeviceAddress )
3999 {
4000 DE_ASSERT(raygenShaderBindingTableRegion != DE_NULL);
4001 DE_ASSERT(missShaderBindingTableRegion != DE_NULL);
4002 DE_ASSERT(hitShaderBindingTableRegion != DE_NULL);
4003 DE_ASSERT(callableShaderBindingTableRegion != DE_NULL);
4004 DE_ASSERT(indirectDeviceAddress != 0);
4005
4006 return vk.cmdTraceRaysIndirectKHR(commandBuffer,
4007 raygenShaderBindingTableRegion,
4008 missShaderBindingTableRegion,
4009 hitShaderBindingTableRegion,
4010 callableShaderBindingTableRegion,
4011 indirectDeviceAddress);
4012 }
4013
cmdTraceRaysIndirect(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)4014 void cmdTraceRaysIndirect (const DeviceInterface& vk,
4015 VkCommandBuffer commandBuffer,
4016 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
4017 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
4018 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
4019 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
4020 VkDeviceAddress indirectDeviceAddress)
4021 {
4022 return cmdTraceRaysIndirectKHR(vk,
4023 commandBuffer,
4024 raygenShaderBindingTableRegion,
4025 missShaderBindingTableRegion,
4026 hitShaderBindingTableRegion,
4027 callableShaderBindingTableRegion,
4028 indirectDeviceAddress);
4029 }
4030
cmdTraceRaysIndirect2KHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)4031 static inline void cmdTraceRaysIndirect2KHR (const DeviceInterface& vk,
4032 VkCommandBuffer commandBuffer,
4033 VkDeviceAddress indirectDeviceAddress )
4034 {
4035 DE_ASSERT(indirectDeviceAddress != 0);
4036
4037 return vk.cmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
4038 }
4039
cmdTraceRaysIndirect2(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)4040 void cmdTraceRaysIndirect2 (const DeviceInterface& vk,
4041 VkCommandBuffer commandBuffer,
4042 VkDeviceAddress indirectDeviceAddress)
4043 {
4044 return cmdTraceRaysIndirect2KHR(vk, commandBuffer, indirectDeviceAddress);
4045 }
4046
4047 #else
4048
4049 deUint32 rayTracingDefineAnything()
4050 {
4051 return 0;
4052 }
4053
4054 #endif // CTS_USES_VULKANSC
4055
4056 } // vk
4057