1 /*-------------------------------------------------------------------------
2 * Vulkan CTS Framework
3 * --------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Utilities for creating commonly used Vulkan objects
22 *//*--------------------------------------------------------------------*/
23
24 #include "vkRayTracingUtil.hpp"
25
26 #include "vkRefUtil.hpp"
27 #include "vkQueryUtil.hpp"
28 #include "vkObjUtil.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkCmdUtil.hpp"
31
32 #include "deStringUtil.hpp"
33 #include "deSTLUtil.hpp"
34
35 #include <vector>
36 #include <string>
37 #include <thread>
38 #include <limits>
39 #include <type_traits>
40 #include <map>
41
42 namespace vk
43 {
44
45 #ifndef CTS_USES_VULKANSC
46
47 struct DeferredThreadParams
48 {
49 const DeviceInterface& vk;
50 VkDevice device;
51 VkDeferredOperationKHR deferredOperation;
52 VkResult result;
53 };
54
getFormatSimpleName(vk::VkFormat format)55 std::string getFormatSimpleName (vk::VkFormat format)
56 {
57 constexpr size_t kPrefixLen = 10; // strlen("VK_FORMAT_")
58 return de::toLower(de::toString(format).substr(kPrefixLen));
59 }
60
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)61 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
62 {
63 float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
64 float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
65
66 if ((s < 0) != (t < 0))
67 return false;
68
69 float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
70
71 return a < 0 ?
72 (s <= 0 && s + t >= a) :
73 (s >= 0 && s + t <= a);
74 }
75
76 // Returns true if VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR needs to be supported for the given format.
isMandatoryAccelerationStructureVertexBufferFormat(vk::VkFormat format)77 static bool isMandatoryAccelerationStructureVertexBufferFormat (vk::VkFormat format)
78 {
79 bool mandatory = false;
80
81 switch (format)
82 {
83 case VK_FORMAT_R32G32_SFLOAT:
84 case VK_FORMAT_R32G32B32_SFLOAT:
85 case VK_FORMAT_R16G16_SFLOAT:
86 case VK_FORMAT_R16G16B16A16_SFLOAT:
87 case VK_FORMAT_R16G16_SNORM:
88 case VK_FORMAT_R16G16B16A16_SNORM:
89 mandatory = true;
90 break;
91 default:
92 break;
93 }
94
95 return mandatory;
96 }
97
checkAccelerationStructureVertexBufferFormat(const vk::InstanceInterface & vki,vk::VkPhysicalDevice physicalDevice,vk::VkFormat format)98 void checkAccelerationStructureVertexBufferFormat (const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice, vk::VkFormat format)
99 {
100 const vk::VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
101
102 if ((formatProperties.bufferFeatures & vk::VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR) == 0u)
103 {
104 const std::string errorMsg = "Format not supported for acceleration structure vertex buffers";
105 if (isMandatoryAccelerationStructureVertexBufferFormat(format))
106 TCU_FAIL(errorMsg);
107 TCU_THROW(NotSupportedError, errorMsg);
108 }
109 }
110
getCommonRayGenerationShader(void)111 std::string getCommonRayGenerationShader (void)
112 {
113 return
114 "#version 460 core\n"
115 "#extension GL_EXT_ray_tracing : require\n"
116 "layout(location = 0) rayPayloadEXT vec3 hitValue;\n"
117 "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
118 "\n"
119 "void main()\n"
120 "{\n"
121 " uint rayFlags = 0;\n"
122 " uint cullMask = 0xFF;\n"
123 " float tmin = 0.0;\n"
124 " float tmax = 9.0;\n"
125 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
126 " vec3 direct = vec3(0.0, 0.0, -1.0);\n"
127 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
128 "}\n";
129 }
130
RaytracedGeometryBase(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType)131 RaytracedGeometryBase::RaytracedGeometryBase (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType)
132 : m_geometryType (geometryType)
133 , m_vertexFormat (vertexFormat)
134 , m_indexType (indexType)
135 , m_geometryFlags ((VkGeometryFlagsKHR)0u)
136 , m_hasOpacityMicromap (false)
137 {
138 if (m_geometryType == VK_GEOMETRY_TYPE_AABBS_KHR)
139 DE_ASSERT(m_vertexFormat == VK_FORMAT_R32G32B32_SFLOAT);
140 }
141
~RaytracedGeometryBase()142 RaytracedGeometryBase::~RaytracedGeometryBase ()
143 {
144 }
145
146 struct GeometryBuilderParams
147 {
148 VkGeometryTypeKHR geometryType;
149 bool usePadding;
150 };
151
152 template <typename V, typename I>
buildRaytracedGeometry(const GeometryBuilderParams & params)153 RaytracedGeometryBase* buildRaytracedGeometry (const GeometryBuilderParams& params)
154 {
155 return new RaytracedGeometry<V, I>(params.geometryType, (params.usePadding ? 1u : 0u));
156 }
157
makeRaytracedGeometry(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType,bool padVertices)158 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType, bool padVertices)
159 {
160 const GeometryBuilderParams builderParams { geometryType, padVertices };
161
162 switch (vertexFormat)
163 {
164 case VK_FORMAT_R32G32_SFLOAT:
165 switch (indexType)
166 {
167 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint16>(builderParams));
168 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint32>(builderParams));
169 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, EmptyIndex>(builderParams));
170 default: TCU_THROW(InternalError, "Wrong index type");
171 }
172 case VK_FORMAT_R32G32B32_SFLOAT:
173 switch (indexType)
174 {
175 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint16>(builderParams));
176 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint32>(builderParams));
177 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, EmptyIndex>(builderParams));
178 default: TCU_THROW(InternalError, "Wrong index type");
179 }
180 case VK_FORMAT_R32G32B32A32_SFLOAT:
181 switch (indexType)
182 {
183 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint16>(builderParams));
184 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint32>(builderParams));
185 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, EmptyIndex>(builderParams));
186 default: TCU_THROW(InternalError, "Wrong index type");
187 }
188 case VK_FORMAT_R16G16_SFLOAT:
189 switch (indexType)
190 {
191 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint16>(builderParams));
192 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint32>(builderParams));
193 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, EmptyIndex>(builderParams));
194 default: TCU_THROW(InternalError, "Wrong index type");
195 }
196 case VK_FORMAT_R16G16B16_SFLOAT:
197 switch (indexType)
198 {
199 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint16>(builderParams));
200 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint32>(builderParams));
201 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, EmptyIndex>(builderParams));
202 default: TCU_THROW(InternalError, "Wrong index type");
203 }
204 case VK_FORMAT_R16G16B16A16_SFLOAT:
205 switch (indexType)
206 {
207 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint16>(builderParams));
208 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint32>(builderParams));
209 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, EmptyIndex>(builderParams));
210 default: TCU_THROW(InternalError, "Wrong index type");
211 }
212 case VK_FORMAT_R16G16_SNORM:
213 switch (indexType)
214 {
215 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint16>(builderParams));
216 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint32>(builderParams));
217 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, EmptyIndex>(builderParams));
218 default: TCU_THROW(InternalError, "Wrong index type");
219 }
220 case VK_FORMAT_R16G16B16_SNORM:
221 switch (indexType)
222 {
223 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint16>(builderParams));
224 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint32>(builderParams));
225 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, EmptyIndex>(builderParams));
226 default: TCU_THROW(InternalError, "Wrong index type");
227 }
228 case VK_FORMAT_R16G16B16A16_SNORM:
229 switch (indexType)
230 {
231 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint16>(builderParams));
232 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint32>(builderParams));
233 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, EmptyIndex>(builderParams));
234 default: TCU_THROW(InternalError, "Wrong index type");
235 }
236 case VK_FORMAT_R64G64_SFLOAT:
237 switch (indexType)
238 {
239 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint16>(builderParams));
240 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint32>(builderParams));
241 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, EmptyIndex>(builderParams));
242 default: TCU_THROW(InternalError, "Wrong index type");
243 }
244 case VK_FORMAT_R64G64B64_SFLOAT:
245 switch (indexType)
246 {
247 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint16>(builderParams));
248 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint32>(builderParams));
249 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, EmptyIndex>(builderParams));
250 default: TCU_THROW(InternalError, "Wrong index type");
251 }
252 case VK_FORMAT_R64G64B64A64_SFLOAT:
253 switch (indexType)
254 {
255 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint16>(builderParams));
256 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint32>(builderParams));
257 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, EmptyIndex>(builderParams));
258 default: TCU_THROW(InternalError, "Wrong index type");
259 }
260 case VK_FORMAT_R8G8_SNORM:
261 switch (indexType)
262 {
263 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint16>(builderParams));
264 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint32>(builderParams));
265 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, EmptyIndex>(builderParams));
266 default: TCU_THROW(InternalError, "Wrong index type");
267 }
268 case VK_FORMAT_R8G8B8_SNORM:
269 switch (indexType)
270 {
271 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint16>(builderParams));
272 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint32>(builderParams));
273 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, EmptyIndex>(builderParams));
274 default: TCU_THROW(InternalError, "Wrong index type");
275 }
276 case VK_FORMAT_R8G8B8A8_SNORM:
277 switch (indexType)
278 {
279 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint16>(builderParams));
280 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint32>(builderParams));
281 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, EmptyIndex>(builderParams));
282 default: TCU_THROW(InternalError, "Wrong index type");
283 }
284 default:
285 TCU_THROW(InternalError, "Wrong vertex format");
286 }
287
288 }
289
getBufferDeviceAddress(const DeviceInterface & vk,const VkDevice device,const VkBuffer buffer,VkDeviceSize offset)290 VkDeviceAddress getBufferDeviceAddress ( const DeviceInterface& vk,
291 const VkDevice device,
292 const VkBuffer buffer,
293 VkDeviceSize offset )
294 {
295
296 if (buffer == DE_NULL)
297 return 0;
298
299 VkBufferDeviceAddressInfo deviceAddressInfo
300 {
301 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType
302 DE_NULL, // const void* pNext
303 buffer // VkBuffer buffer;
304 };
305 return vk.getBufferDeviceAddress(device, &deviceAddressInfo) + offset;
306 }
307
308
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,deUint32 queryCount)309 static inline Move<VkQueryPool> makeQueryPool (const DeviceInterface& vk,
310 const VkDevice device,
311 const VkQueryType queryType,
312 deUint32 queryCount)
313 {
314 const VkQueryPoolCreateInfo queryPoolCreateInfo =
315 {
316 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // sType
317 DE_NULL, // pNext
318 (VkQueryPoolCreateFlags)0, // flags
319 queryType, // queryType
320 queryCount, // queryCount
321 0u, // pipelineStatistics
322 };
323 return createQueryPool(vk, device, &queryPoolCreateInfo);
324 }
325
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryTrianglesDataKHR & triangles)326 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryTrianglesDataKHR& triangles)
327 {
328 VkAccelerationStructureGeometryDataKHR result;
329
330 deMemset(&result, 0, sizeof(result));
331
332 result.triangles = triangles;
333
334 return result;
335 }
336
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryAabbsDataKHR & aabbs)337 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryAabbsDataKHR& aabbs)
338 {
339 VkAccelerationStructureGeometryDataKHR result;
340
341 deMemset(&result, 0, sizeof(result));
342
343 result.aabbs = aabbs;
344
345 return result;
346 }
347
makeVkAccelerationStructureInstancesDataKHR(const VkAccelerationStructureGeometryInstancesDataKHR & instances)348 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureInstancesDataKHR (const VkAccelerationStructureGeometryInstancesDataKHR& instances)
349 {
350 VkAccelerationStructureGeometryDataKHR result;
351
352 deMemset(&result, 0, sizeof(result));
353
354 result.instances = instances;
355
356 return result;
357 }
358
makeVkAccelerationStructureInstanceKHR(const VkTransformMatrixKHR & transform,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags,deUint64 accelerationStructureReference)359 static inline VkAccelerationStructureInstanceKHR makeVkAccelerationStructureInstanceKHR (const VkTransformMatrixKHR& transform,
360 deUint32 instanceCustomIndex,
361 deUint32 mask,
362 deUint32 instanceShaderBindingTableRecordOffset,
363 VkGeometryInstanceFlagsKHR flags,
364 deUint64 accelerationStructureReference)
365 {
366 VkAccelerationStructureInstanceKHR instance = { transform, 0, 0, 0, 0, accelerationStructureReference };
367 instance.instanceCustomIndex = instanceCustomIndex & 0xFFFFFF;
368 instance.mask = mask & 0xFF;
369 instance.instanceShaderBindingTableRecordOffset = instanceShaderBindingTableRecordOffset & 0xFFFFFF;
370 instance.flags = flags & 0xFF;
371 return instance;
372 }
373
getRayTracingShaderGroupHandlesKHR(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)374 VkResult getRayTracingShaderGroupHandlesKHR (const DeviceInterface& vk,
375 const VkDevice device,
376 const VkPipeline pipeline,
377 const deUint32 firstGroup,
378 const deUint32 groupCount,
379 const deUintptr dataSize,
380 void* pData)
381 {
382 return vk.getRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
383 }
384
getRayTracingShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)385 VkResult getRayTracingShaderGroupHandles (const DeviceInterface& vk,
386 const VkDevice device,
387 const VkPipeline pipeline,
388 const deUint32 firstGroup,
389 const deUint32 groupCount,
390 const deUintptr dataSize,
391 void* pData)
392 {
393 return getRayTracingShaderGroupHandlesKHR(vk, device, pipeline, firstGroup, groupCount, dataSize, pData);
394 }
395
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation)396 VkResult finishDeferredOperation (const DeviceInterface& vk,
397 VkDevice device,
398 VkDeferredOperationKHR deferredOperation)
399 {
400 VkResult result = vk.deferredOperationJoinKHR(device, deferredOperation);
401
402 while (result == VK_THREAD_IDLE_KHR)
403 {
404 std::this_thread::yield();
405 result = vk.deferredOperationJoinKHR(device, deferredOperation);
406 }
407
408 switch( result )
409 {
410 case VK_SUCCESS:
411 {
412 // Deferred operation has finished. Query its result
413 result = vk.getDeferredOperationResultKHR(device, deferredOperation);
414
415 break;
416 }
417
418 case VK_THREAD_DONE_KHR:
419 {
420 // Deferred operation is being wrapped up by another thread
421 // wait for that thread to finish
422 do
423 {
424 std::this_thread::yield();
425 result = vk.getDeferredOperationResultKHR(device, deferredOperation);
426 } while (result == VK_NOT_READY);
427
428 break;
429 }
430
431 default:
432 {
433 DE_ASSERT(false);
434
435 break;
436 }
437 }
438
439 return result;
440 }
441
finishDeferredOperationThreaded(DeferredThreadParams * deferredThreadParams)442 void finishDeferredOperationThreaded (DeferredThreadParams* deferredThreadParams)
443 {
444 deferredThreadParams->result = finishDeferredOperation(deferredThreadParams->vk, deferredThreadParams->device, deferredThreadParams->deferredOperation);
445 }
446
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation,const deUint32 workerThreadCount,const bool operationNotDeferred)447 void finishDeferredOperation (const DeviceInterface& vk,
448 VkDevice device,
449 VkDeferredOperationKHR deferredOperation,
450 const deUint32 workerThreadCount,
451 const bool operationNotDeferred)
452 {
453
454 if (operationNotDeferred)
455 {
456 // when the operation deferral returns VK_OPERATION_NOT_DEFERRED_KHR,
457 // the deferred operation should act as if no command was deferred
458 VK_CHECK(vk.getDeferredOperationResultKHR(device, deferredOperation));
459
460
461 // there is not need to join any threads to the deferred operation,
462 // so below can be skipped.
463 return;
464 }
465
466 if (workerThreadCount == 0)
467 {
468 VK_CHECK(finishDeferredOperation(vk, device, deferredOperation));
469 }
470 else
471 {
472 const deUint32 maxThreadCountSupported = deMinu32(256u, vk.getDeferredOperationMaxConcurrencyKHR(device, deferredOperation));
473 const deUint32 requestedThreadCount = workerThreadCount;
474 const deUint32 testThreadCount = requestedThreadCount == std::numeric_limits<deUint32>::max() ? maxThreadCountSupported : requestedThreadCount;
475
476 if (maxThreadCountSupported == 0)
477 TCU_FAIL("vkGetDeferredOperationMaxConcurrencyKHR must not return 0");
478
479 const DeferredThreadParams deferredThreadParams =
480 {
481 vk, // const DeviceInterface& vk;
482 device, // VkDevice device;
483 deferredOperation, // VkDeferredOperationKHR deferredOperation;
484 VK_RESULT_MAX_ENUM, // VResult result;
485 };
486 std::vector<DeferredThreadParams> threadParams (testThreadCount, deferredThreadParams);
487 std::vector<de::MovePtr<std::thread> > threads (testThreadCount);
488 bool executionResult = false;
489
490 DE_ASSERT(threads.size() > 0 && threads.size() == testThreadCount);
491
492 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
493 threads[threadNdx] = de::MovePtr<std::thread>(new std::thread(finishDeferredOperationThreaded, &threadParams[threadNdx]));
494
495 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
496 threads[threadNdx]->join();
497
498 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
499 if (threadParams[threadNdx].result == VK_SUCCESS)
500 executionResult = true;
501
502 if (!executionResult)
503 TCU_FAIL("Neither reported VK_SUCCESS");
504 }
505 }
506
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const VkDeviceSize storageSize)507 SerialStorage::SerialStorage (const DeviceInterface& vk,
508 const VkDevice device,
509 Allocator& allocator,
510 const VkAccelerationStructureBuildTypeKHR buildType,
511 const VkDeviceSize storageSize)
512 : m_buildType (buildType)
513 , m_storageSize (storageSize)
514 , m_serialInfo ()
515 {
516 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
517 try
518 {
519 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
520 }
521 catch (const tcu::NotSupportedError&)
522 {
523 // retry without Cached flag
524 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
525 }
526 }
527
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const SerialInfo & serialInfo)528 SerialStorage::SerialStorage (const DeviceInterface& vk,
529 const VkDevice device,
530 Allocator& allocator,
531 const VkAccelerationStructureBuildTypeKHR buildType,
532 const SerialInfo& serialInfo)
533 : m_buildType (buildType)
534 , m_storageSize (serialInfo.sizes()[0]) // raise assertion if serialInfo is empty
535 , m_serialInfo (serialInfo)
536 {
537 DE_ASSERT(serialInfo.sizes().size() >= 2u);
538
539 // create buffer for top-level acceleration structure
540 {
541 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
542 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
543 }
544
545 // create buffers for bottom-level acceleration structures
546 {
547 std::vector<deUint64> addrs;
548
549 for (std::size_t i = 1; i < serialInfo.addresses().size(); ++i)
550 {
551 const deUint64& lookAddr = serialInfo.addresses()[i];
552 auto end = addrs.end();
553 auto match = std::find_if(addrs.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
554 if (match == end)
555 {
556 addrs.emplace_back(lookAddr);
557 m_bottoms.emplace_back(de::SharedPtr<SerialStorage>(new SerialStorage(vk, device, allocator, buildType, serialInfo.sizes()[i])));
558 }
559 }
560 }
561 }
562
getAddress(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)563 VkDeviceOrHostAddressKHR SerialStorage::getAddress (const DeviceInterface& vk,
564 const VkDevice device,
565 const VkAccelerationStructureBuildTypeKHR buildType)
566 {
567 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
568 return makeDeviceOrHostAddressKHR(vk, device, m_buffer->get(), 0);
569 else
570 return makeDeviceOrHostAddressKHR(m_buffer->getAllocation().getHostPtr());
571 }
572
getASHeader()573 SerialStorage::AccelerationStructureHeader* SerialStorage::getASHeader ()
574 {
575 return reinterpret_cast<AccelerationStructureHeader*>(getHostAddress().hostAddress);
576 }
577
hasDeepFormat() const578 bool SerialStorage::hasDeepFormat () const
579 {
580 return (m_serialInfo.sizes().size() >= 2u);
581 }
582
getBottomStorage(deUint32 index) const583 de::SharedPtr<SerialStorage> SerialStorage::getBottomStorage (deUint32 index) const
584 {
585 return m_bottoms[index];
586 }
587
getHostAddress(VkDeviceSize offset)588 VkDeviceOrHostAddressKHR SerialStorage::getHostAddress (VkDeviceSize offset)
589 {
590 DE_ASSERT(offset < m_storageSize);
591 return makeDeviceOrHostAddressKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
592 }
593
getHostAddressConst(VkDeviceSize offset)594 VkDeviceOrHostAddressConstKHR SerialStorage::getHostAddressConst (VkDeviceSize offset)
595 {
596 return makeDeviceOrHostAddressConstKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
597 }
598
getAddressConst(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)599 VkDeviceOrHostAddressConstKHR SerialStorage::getAddressConst (const DeviceInterface& vk,
600 const VkDevice device,
601 const VkAccelerationStructureBuildTypeKHR buildType)
602 {
603 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
604 return makeDeviceOrHostAddressConstKHR(vk, device, m_buffer->get(), 0);
605 else
606 return getHostAddressConst();
607 }
608
getStorageSize() const609 inline VkDeviceSize SerialStorage::getStorageSize () const
610 {
611 return m_storageSize;
612 }
613
getSerialInfo() const614 inline const SerialInfo& SerialStorage::getSerialInfo () const
615 {
616 return m_serialInfo;
617 }
618
getDeserializedSize()619 deUint64 SerialStorage::getDeserializedSize ()
620 {
621 deUint64 result = 0;
622 const deUint8* startPtr = static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr());
623
624 DE_ASSERT(sizeof(result) == DESERIALIZED_SIZE_SIZE);
625
626 deMemcpy(&result, startPtr + DESERIALIZED_SIZE_OFFSET, sizeof(result));
627
628 return result;
629 }
630
~BottomLevelAccelerationStructure()631 BottomLevelAccelerationStructure::~BottomLevelAccelerationStructure ()
632 {
633 }
634
BottomLevelAccelerationStructure()635 BottomLevelAccelerationStructure::BottomLevelAccelerationStructure ()
636 : m_structureSize (0u)
637 , m_updateScratchSize (0u)
638 , m_buildScratchSize (0u)
639 {
640 }
641
setGeometryData(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags)642 void BottomLevelAccelerationStructure::setGeometryData (const std::vector<tcu::Vec3>& geometryData,
643 const bool triangles,
644 const VkGeometryFlagsKHR geometryFlags)
645 {
646 if (triangles)
647 DE_ASSERT((geometryData.size() % 3) == 0);
648 else
649 DE_ASSERT((geometryData.size() % 2) == 0);
650
651 setGeometryCount(1u);
652
653 addGeometry(geometryData, triangles, geometryFlags);
654 }
655
setDefaultGeometryData(const VkShaderStageFlagBits testStage,const VkGeometryFlagsKHR geometryFlags)656 void BottomLevelAccelerationStructure::setDefaultGeometryData (const VkShaderStageFlagBits testStage,
657 const VkGeometryFlagsKHR geometryFlags)
658 {
659 bool trianglesData = false;
660 float z = 0.0f;
661 std::vector<tcu::Vec3> geometryData;
662
663 switch (testStage)
664 {
665 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: z = -1.0f; trianglesData = true; break;
666 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: z = -1.0f; trianglesData = true; break;
667 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: z = -1.0f; trianglesData = true; break;
668 case VK_SHADER_STAGE_MISS_BIT_KHR: z = -9.9f; trianglesData = true; break;
669 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: z = -1.0f; trianglesData = false; break;
670 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: z = -1.0f; trianglesData = true; break;
671 default: TCU_THROW(InternalError, "Unacceptable stage");
672 }
673
674 if (trianglesData)
675 {
676 geometryData.reserve(6);
677
678 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
679 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
680 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
681 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
682 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
683 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
684 }
685 else
686 {
687 geometryData.reserve(2);
688
689 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
690 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
691 }
692
693 setGeometryCount(1u);
694
695 addGeometry(geometryData, trianglesData, geometryFlags);
696 }
697
setGeometryCount(const size_t geometryCount)698 void BottomLevelAccelerationStructure::setGeometryCount (const size_t geometryCount)
699 {
700 m_geometriesData.clear();
701
702 m_geometriesData.reserve(geometryCount);
703 }
704
addGeometry(de::SharedPtr<RaytracedGeometryBase> & raytracedGeometry)705 void BottomLevelAccelerationStructure::addGeometry (de::SharedPtr<RaytracedGeometryBase>& raytracedGeometry)
706 {
707 m_geometriesData.push_back(raytracedGeometry);
708 }
709
addGeometry(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags,const VkAccelerationStructureTrianglesOpacityMicromapEXT * opacityGeometryMicromap)710 void BottomLevelAccelerationStructure::addGeometry (const std::vector<tcu::Vec3>& geometryData,
711 const bool triangles,
712 const VkGeometryFlagsKHR geometryFlags,
713 const VkAccelerationStructureTrianglesOpacityMicromapEXT* opacityGeometryMicromap)
714 {
715 DE_ASSERT(geometryData.size() > 0);
716 DE_ASSERT((triangles && geometryData.size() % 3 == 0) || (!triangles && geometryData.size() % 2 == 0));
717
718 if (!triangles)
719 for (size_t posNdx = 0; posNdx < geometryData.size() / 2; ++posNdx)
720 {
721 DE_ASSERT(geometryData[2 * posNdx].x() <= geometryData[2 * posNdx + 1].x());
722 DE_ASSERT(geometryData[2 * posNdx].y() <= geometryData[2 * posNdx + 1].y());
723 DE_ASSERT(geometryData[2 * posNdx].z() <= geometryData[2 * posNdx + 1].z());
724 }
725
726 de::SharedPtr<RaytracedGeometryBase> geometry = makeRaytracedGeometry(triangles ? VK_GEOMETRY_TYPE_TRIANGLES_KHR : VK_GEOMETRY_TYPE_AABBS_KHR, VK_FORMAT_R32G32B32_SFLOAT, VK_INDEX_TYPE_NONE_KHR);
727 for (auto it = begin(geometryData), eit = end(geometryData); it != eit; ++it)
728 geometry->addVertex(*it);
729
730 geometry->setGeometryFlags(geometryFlags);
731 if (opacityGeometryMicromap)
732 geometry->setOpacityMicromap(opacityGeometryMicromap);
733 addGeometry(geometry);
734 }
735
getStructureBuildSizes() const736 VkAccelerationStructureBuildSizesInfoKHR BottomLevelAccelerationStructure::getStructureBuildSizes () const
737 {
738 return
739 {
740 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
741 DE_NULL, // const void* pNext;
742 m_structureSize, // VkDeviceSize accelerationStructureSize;
743 m_updateScratchSize, // VkDeviceSize updateScratchSize;
744 m_buildScratchSize // VkDeviceSize buildScratchSize;
745 };
746 };
747
getVertexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)748 VkDeviceSize getVertexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
749 {
750 DE_ASSERT(geometriesData.size() != 0);
751 VkDeviceSize bufferSizeBytes = 0;
752 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
753 bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getVertexByteSize(),8);
754 return bufferSizeBytes;
755 }
756
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)757 BufferWithMemory* createVertexBuffer (const DeviceInterface& vk,
758 const VkDevice device,
759 Allocator& allocator,
760 const VkDeviceSize bufferSizeBytes)
761 {
762 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
763 return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
764 }
765
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)766 BufferWithMemory* createVertexBuffer (const DeviceInterface& vk,
767 const VkDevice device,
768 Allocator& allocator,
769 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
770 {
771 return createVertexBuffer(vk, device, allocator, getVertexBufferSize(geometriesData));
772 }
773
updateVertexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * vertexBuffer,VkDeviceSize geometriesOffset=0)774 void updateVertexBuffer (const DeviceInterface& vk,
775 const VkDevice device,
776 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData,
777 BufferWithMemory* vertexBuffer,
778 VkDeviceSize geometriesOffset = 0)
779 {
780 const Allocation& geometryAlloc = vertexBuffer->getAllocation();
781 deUint8* bufferStart = static_cast<deUint8*>(geometryAlloc.getHostPtr());
782 VkDeviceSize bufferOffset = geometriesOffset;
783
784 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
785 {
786 const void* geometryPtr = geometriesData[geometryNdx]->getVertexPointer();
787 const size_t geometryPtrSize = geometriesData[geometryNdx]->getVertexByteSize();
788
789 deMemcpy(&bufferStart[bufferOffset], geometryPtr, geometryPtrSize);
790
791 bufferOffset += deAlignSize(geometryPtrSize,8);
792 }
793
794 // Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
795 // align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
796 // for the vertex and index buffers, so flushing is actually not needed.
797 flushAlloc(vk, device, geometryAlloc);
798 }
799
getIndexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)800 VkDeviceSize getIndexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
801 {
802 DE_ASSERT(!geometriesData.empty());
803
804 VkDeviceSize bufferSizeBytes = 0;
805 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
806 if(geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
807 bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getIndexByteSize(),8);
808 return bufferSizeBytes;
809 }
810
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)811 BufferWithMemory* createIndexBuffer (const DeviceInterface& vk,
812 const VkDevice device,
813 Allocator& allocator,
814 const VkDeviceSize bufferSizeBytes)
815 {
816 DE_ASSERT(bufferSizeBytes);
817 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
818 return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
819 }
820
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)821 BufferWithMemory* createIndexBuffer (const DeviceInterface& vk,
822 const VkDevice device,
823 Allocator& allocator,
824 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
825 {
826
827
828 const VkDeviceSize bufferSizeBytes = getIndexBufferSize(geometriesData);
829 return bufferSizeBytes ? createIndexBuffer(vk, device, allocator, bufferSizeBytes) : nullptr;
830 }
831
updateIndexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * indexBuffer,VkDeviceSize geometriesOffset)832 void updateIndexBuffer (const DeviceInterface& vk,
833 const VkDevice device,
834 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData,
835 BufferWithMemory* indexBuffer,
836 VkDeviceSize geometriesOffset)
837 {
838 const Allocation& indexAlloc = indexBuffer->getAllocation();
839 deUint8* bufferStart = static_cast<deUint8*>(indexAlloc.getHostPtr());
840 VkDeviceSize bufferOffset = geometriesOffset;
841
842 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
843 {
844 if (geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
845 {
846 const void* indexPtr = geometriesData[geometryNdx]->getIndexPointer();
847 const size_t indexPtrSize = geometriesData[geometryNdx]->getIndexByteSize();
848
849 deMemcpy(&bufferStart[bufferOffset], indexPtr, indexPtrSize);
850
851 bufferOffset += deAlignSize(indexPtrSize, 8);
852 }
853 }
854
855 // Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
856 // align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
857 // for the vertex and index buffers, so flushing is actually not needed.
858 flushAlloc(vk, device, indexAlloc);
859 }
860
861 class BottomLevelAccelerationStructureKHR : public BottomLevelAccelerationStructure
862 {
863 public:
864 static deUint32 getRequiredAllocationCount (void);
865
866 BottomLevelAccelerationStructureKHR ();
867 BottomLevelAccelerationStructureKHR (const BottomLevelAccelerationStructureKHR& other) = delete;
868 virtual ~BottomLevelAccelerationStructureKHR ();
869
870 void setBuildType (const VkAccelerationStructureBuildTypeKHR buildType) override;
871 VkAccelerationStructureBuildTypeKHR getBuildType () const override;
872 void setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags) override;
873 void setCreateGeneric (bool createGeneric) override;
874 void setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags) override;
875 void setBuildWithoutGeometries (bool buildWithoutGeometries) override;
876 void setBuildWithoutPrimitives (bool buildWithoutPrimitives) override;
877 void setDeferredOperation (const bool deferredOperation,
878 const deUint32 workerThreadCount) override;
879 void setUseArrayOfPointers (const bool useArrayOfPointers) override;
880 void setIndirectBuildParameters (const VkBuffer indirectBuffer,
881 const VkDeviceSize indirectBufferOffset,
882 const deUint32 indirectBufferStride) override;
883 VkBuildAccelerationStructureFlagsKHR getBuildFlags () const override;
884
885 void create (const DeviceInterface& vk,
886 const VkDevice device,
887 Allocator& allocator,
888 VkDeviceSize structureSize,
889 VkDeviceAddress deviceAddress = 0u,
890 const void* pNext = DE_NULL,
891 const MemoryRequirement& addMemoryRequirement = MemoryRequirement::Any) override;
892 void build (const DeviceInterface& vk,
893 const VkDevice device,
894 const VkCommandBuffer cmdBuffer) override;
895 void copyFrom (const DeviceInterface& vk,
896 const VkDevice device,
897 const VkCommandBuffer cmdBuffer,
898 BottomLevelAccelerationStructure* accelerationStructure,
899 bool compactCopy) override;
900
901 void serialize (const DeviceInterface& vk,
902 const VkDevice device,
903 const VkCommandBuffer cmdBuffer,
904 SerialStorage* storage) override;
905 void deserialize (const DeviceInterface& vk,
906 const VkDevice device,
907 const VkCommandBuffer cmdBuffer,
908 SerialStorage* storage) override;
909
910 const VkAccelerationStructureKHR* getPtr (void) const override;
911
912 protected:
913 VkAccelerationStructureBuildTypeKHR m_buildType;
914 VkAccelerationStructureCreateFlagsKHR m_createFlags;
915 bool m_createGeneric;
916 VkBuildAccelerationStructureFlagsKHR m_buildFlags;
917 bool m_buildWithoutGeometries;
918 bool m_buildWithoutPrimitives;
919 bool m_deferredOperation;
920 deUint32 m_workerThreadCount;
921 bool m_useArrayOfPointers;
922 de::MovePtr<BufferWithMemory> m_accelerationStructureBuffer;
923 de::MovePtr<BufferWithMemory> m_vertexBuffer;
924 de::MovePtr<BufferWithMemory> m_indexBuffer;
925 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
926 de::UniquePtr<std::vector<deUint8>> m_hostScratchBuffer;
927 Move<VkAccelerationStructureKHR> m_accelerationStructureKHR;
928 VkBuffer m_indirectBuffer;
929 VkDeviceSize m_indirectBufferOffset;
930 deUint32 m_indirectBufferStride;
931
932 void prepareGeometries (const DeviceInterface& vk,
933 const VkDevice device,
934 std::vector<VkAccelerationStructureGeometryKHR>& accelerationStructureGeometriesKHR,
935 std::vector<VkAccelerationStructureGeometryKHR*>& accelerationStructureGeometriesKHRPointers,
936 std::vector<VkAccelerationStructureBuildRangeInfoKHR>& accelerationStructureBuildRangeInfoKHR,
937 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>& accelerationStructureGeometryMicromapsEXT,
938 std::vector<deUint32>& maxPrimitiveCounts,
939 VkDeviceSize vertexBufferOffset = 0,
940 VkDeviceSize indexBufferOffset = 0) const;
941
getAccelerationStructureBuffer() const942 virtual BufferWithMemory* getAccelerationStructureBuffer () const { return m_accelerationStructureBuffer.get(); }
getDeviceScratchBuffer() const943 virtual BufferWithMemory* getDeviceScratchBuffer () const { return m_deviceScratchBuffer.get(); }
getHostScratchBuffer() const944 virtual std::vector<deUint8>* getHostScratchBuffer () const { return m_hostScratchBuffer.get(); }
getVertexBuffer() const945 virtual BufferWithMemory* getVertexBuffer () const { return m_vertexBuffer.get(); }
getIndexBuffer() const946 virtual BufferWithMemory* getIndexBuffer () const { return m_indexBuffer.get(); }
947
getAccelerationStructureBufferOffset() const948 virtual VkDeviceSize getAccelerationStructureBufferOffset () const { return 0; }
getDeviceScratchBufferOffset() const949 virtual VkDeviceSize getDeviceScratchBufferOffset () const { return 0; }
getVertexBufferOffset() const950 virtual VkDeviceSize getVertexBufferOffset () const { return 0; }
getIndexBufferOffset() const951 virtual VkDeviceSize getIndexBufferOffset () const { return 0; }
952 };
953
getRequiredAllocationCount(void)954 deUint32 BottomLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
955 {
956 /*
957 de::MovePtr<BufferWithMemory> m_geometryBuffer; // but only when m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
958 de::MovePtr<Allocation> m_accelerationStructureAlloc;
959 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
960 */
961 return 3u;
962 }
963
~BottomLevelAccelerationStructureKHR()964 BottomLevelAccelerationStructureKHR::~BottomLevelAccelerationStructureKHR ()
965 {
966 }
967
BottomLevelAccelerationStructureKHR()968 BottomLevelAccelerationStructureKHR::BottomLevelAccelerationStructureKHR ()
969 : BottomLevelAccelerationStructure ()
970 , m_buildType (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
971 , m_createFlags (0u)
972 , m_createGeneric (false)
973 , m_buildFlags (0u)
974 , m_buildWithoutGeometries (false)
975 , m_buildWithoutPrimitives (false)
976 , m_deferredOperation (false)
977 , m_workerThreadCount (0)
978 , m_useArrayOfPointers (false)
979 , m_accelerationStructureBuffer (DE_NULL)
980 , m_vertexBuffer (DE_NULL)
981 , m_indexBuffer (DE_NULL)
982 , m_deviceScratchBuffer (DE_NULL)
983 , m_hostScratchBuffer (new std::vector<deUint8>)
984 , m_accelerationStructureKHR ()
985 , m_indirectBuffer (DE_NULL)
986 , m_indirectBufferOffset (0)
987 , m_indirectBufferStride (0)
988 {
989 }
990
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)991 void BottomLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR buildType)
992 {
993 m_buildType = buildType;
994 }
995
getBuildType() const996 VkAccelerationStructureBuildTypeKHR BottomLevelAccelerationStructureKHR::getBuildType () const
997 {
998 return m_buildType;
999 }
1000
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)1001 void BottomLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags)
1002 {
1003 m_createFlags = createFlags;
1004 }
1005
setCreateGeneric(bool createGeneric)1006 void BottomLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
1007 {
1008 m_createGeneric = createGeneric;
1009 }
1010
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)1011 void BottomLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags)
1012 {
1013 m_buildFlags = buildFlags;
1014 }
1015
setBuildWithoutGeometries(bool buildWithoutGeometries)1016 void BottomLevelAccelerationStructureKHR::setBuildWithoutGeometries (bool buildWithoutGeometries)
1017 {
1018 m_buildWithoutGeometries = buildWithoutGeometries;
1019 }
1020
setBuildWithoutPrimitives(bool buildWithoutPrimitives)1021 void BottomLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
1022 {
1023 m_buildWithoutPrimitives = buildWithoutPrimitives;
1024 }
1025
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)1026 void BottomLevelAccelerationStructureKHR::setDeferredOperation (const bool deferredOperation,
1027 const deUint32 workerThreadCount)
1028 {
1029 m_deferredOperation = deferredOperation;
1030 m_workerThreadCount = workerThreadCount;
1031 }
1032
setUseArrayOfPointers(const bool useArrayOfPointers)1033 void BottomLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool useArrayOfPointers)
1034 {
1035 m_useArrayOfPointers = useArrayOfPointers;
1036 }
1037
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)1038 void BottomLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer indirectBuffer,
1039 const VkDeviceSize indirectBufferOffset,
1040 const deUint32 indirectBufferStride)
1041 {
1042 m_indirectBuffer = indirectBuffer;
1043 m_indirectBufferOffset = indirectBufferOffset;
1044 m_indirectBufferStride = indirectBufferStride;
1045 }
1046
getBuildFlags() const1047 VkBuildAccelerationStructureFlagsKHR BottomLevelAccelerationStructureKHR::getBuildFlags () const
1048 {
1049 return m_buildFlags;
1050 }
1051
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement)1052 void BottomLevelAccelerationStructureKHR::create (const DeviceInterface& vk,
1053 const VkDevice device,
1054 Allocator& allocator,
1055 VkDeviceSize structureSize,
1056 VkDeviceAddress deviceAddress,
1057 const void* pNext,
1058 const MemoryRequirement& addMemoryRequirement)
1059 {
1060 // AS may be built from geometries using vkCmdBuildAccelerationStructuresKHR / vkBuildAccelerationStructuresKHR
1061 // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
1062 DE_ASSERT(!m_geometriesData.empty() != !(structureSize == 0)); // logical xor
1063
1064 if (structureSize == 0)
1065 {
1066 std::vector<VkAccelerationStructureGeometryKHR> accelerationStructureGeometriesKHR;
1067 std::vector<VkAccelerationStructureGeometryKHR*> accelerationStructureGeometriesKHRPointers;
1068 std::vector<VkAccelerationStructureBuildRangeInfoKHR> accelerationStructureBuildRangeInfoKHR;
1069 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1070 std::vector<deUint32> maxPrimitiveCounts;
1071 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
1072
1073 const VkAccelerationStructureGeometryKHR* accelerationStructureGeometriesKHRPointer = accelerationStructureGeometriesKHR.data();
1074 const VkAccelerationStructureGeometryKHR* const* accelerationStructureGeometry = accelerationStructureGeometriesKHRPointers.data();
1075
1076 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
1077 {
1078 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
1079 DE_NULL, // const void* pNext;
1080 VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
1081 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
1082 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
1083 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
1084 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
1085 static_cast<deUint32>(accelerationStructureGeometriesKHR.size()), // deUint32 geometryCount;
1086 m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer, // const VkAccelerationStructureGeometryKHR* pGeometries;
1087 m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL, // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
1088 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
1089 };
1090 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
1091 {
1092 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
1093 DE_NULL, // const void* pNext;
1094 0, // VkDeviceSize accelerationStructureSize;
1095 0, // VkDeviceSize updateScratchSize;
1096 0 // VkDeviceSize buildScratchSize;
1097 };
1098
1099 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
1100
1101 m_structureSize = sizeInfo.accelerationStructureSize;
1102 m_updateScratchSize = sizeInfo.updateScratchSize;
1103 m_buildScratchSize = sizeInfo.buildScratchSize;
1104 }
1105 else
1106 {
1107 m_structureSize = structureSize;
1108 m_updateScratchSize = 0u;
1109 m_buildScratchSize = 0u;
1110 }
1111
1112 {
1113 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1114 const MemoryRequirement memoryRequirement = addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
1115
1116 try
1117 {
1118 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
1119 }
1120 catch (const tcu::NotSupportedError&)
1121 {
1122 // retry without Cached flag
1123 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
1124 }
1125 }
1126
1127 {
1128 const VkAccelerationStructureTypeKHR structureType = (m_createGeneric
1129 ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
1130 : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
1131 const VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfoKHR
1132 {
1133 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // VkStructureType sType;
1134 pNext, // const void* pNext;
1135 m_createFlags, // VkAccelerationStructureCreateFlagsKHR createFlags;
1136 getAccelerationStructureBuffer()->get(), // VkBuffer buffer;
1137 getAccelerationStructureBufferOffset(), // VkDeviceSize offset;
1138 m_structureSize, // VkDeviceSize size;
1139 structureType, // VkAccelerationStructureTypeKHR type;
1140 deviceAddress // VkDeviceAddress deviceAddress;
1141 };
1142
1143 m_accelerationStructureKHR = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
1144 }
1145
1146 if (m_buildScratchSize > 0u)
1147 {
1148 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1149 {
1150 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1151 m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1152 }
1153 else
1154 {
1155 m_hostScratchBuffer->resize(static_cast<size_t>(m_buildScratchSize));
1156 }
1157 }
1158
1159 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR && !m_geometriesData.empty())
1160 {
1161 m_vertexBuffer = de::MovePtr<BufferWithMemory>(createVertexBuffer(vk, device, allocator, m_geometriesData));
1162 m_indexBuffer = de::MovePtr<BufferWithMemory>(createIndexBuffer(vk, device, allocator, m_geometriesData));
1163 }
1164 }
1165
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer)1166 void BottomLevelAccelerationStructureKHR::build (const DeviceInterface& vk,
1167 const VkDevice device,
1168 const VkCommandBuffer cmdBuffer)
1169 {
1170 DE_ASSERT(!m_geometriesData.empty());
1171 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1172 DE_ASSERT(m_buildScratchSize != 0);
1173
1174 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1175 {
1176 updateVertexBuffer(vk, device, m_geometriesData, getVertexBuffer(), getVertexBufferOffset());
1177 if(getIndexBuffer() != DE_NULL)
1178 updateIndexBuffer(vk, device, m_geometriesData, getIndexBuffer(), getIndexBufferOffset());
1179 }
1180
1181 {
1182 std::vector<VkAccelerationStructureGeometryKHR> accelerationStructureGeometriesKHR;
1183 std::vector<VkAccelerationStructureGeometryKHR*> accelerationStructureGeometriesKHRPointers;
1184 std::vector<VkAccelerationStructureBuildRangeInfoKHR> accelerationStructureBuildRangeInfoKHR;
1185 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1186 std::vector<deUint32> maxPrimitiveCounts;
1187
1188 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers,
1189 accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts, getVertexBufferOffset(), getIndexBufferOffset());
1190
1191 const VkAccelerationStructureGeometryKHR* accelerationStructureGeometriesKHRPointer = accelerationStructureGeometriesKHR.data();
1192 const VkAccelerationStructureGeometryKHR* const* accelerationStructureGeometry = accelerationStructureGeometriesKHRPointers.data();
1193 VkDeviceOrHostAddressKHR scratchData = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1194 ? makeDeviceOrHostAddressKHR(vk, device, getDeviceScratchBuffer()->get(), getDeviceScratchBufferOffset())
1195 : makeDeviceOrHostAddressKHR(getHostScratchBuffer()->data());
1196 const deUint32 geometryCount = (m_buildWithoutGeometries
1197 ? 0u
1198 : static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1199
1200 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
1201 {
1202 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
1203 DE_NULL, // const void* pNext;
1204 VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
1205 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
1206 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
1207 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
1208 m_accelerationStructureKHR.get(), // VkAccelerationStructureKHR dstAccelerationStructure;
1209 geometryCount, // deUint32 geometryCount;
1210 m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer, // const VkAccelerationStructureGeometryKHR* pGeometries;
1211 m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL, // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
1212 scratchData // VkDeviceOrHostAddressKHR scratchData;
1213 };
1214
1215 VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr = accelerationStructureBuildRangeInfoKHR.data();
1216
1217 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1218 {
1219 if (m_indirectBuffer == DE_NULL)
1220 vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1221 else
1222 {
1223 VkDeviceAddress indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
1224 deUint32* pMaxPrimitiveCounts = maxPrimitiveCounts.data();
1225 vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
1226 }
1227 }
1228 else if (!m_deferredOperation)
1229 {
1230 VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
1231 }
1232 else
1233 {
1234 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1235 const auto deferredOperation = deferredOperationPtr.get();
1236
1237 VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1238
1239 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1240
1241 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1242 }
1243 }
1244
1245 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1246 {
1247 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1248 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1249
1250 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1251 }
1252 }
1253
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,BottomLevelAccelerationStructure * accelerationStructure,bool compactCopy)1254 void BottomLevelAccelerationStructureKHR::copyFrom (const DeviceInterface& vk,
1255 const VkDevice device,
1256 const VkCommandBuffer cmdBuffer,
1257 BottomLevelAccelerationStructure* accelerationStructure,
1258 bool compactCopy)
1259 {
1260 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1261 DE_ASSERT(accelerationStructure != DE_NULL);
1262
1263 VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1264 {
1265 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
1266 DE_NULL, // const void* pNext;
1267 *(accelerationStructure->getPtr()), // VkAccelerationStructureKHR src;
1268 *(getPtr()), // VkAccelerationStructureKHR dst;
1269 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR // VkCopyAccelerationStructureModeKHR mode;
1270 };
1271
1272 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1273 {
1274 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
1275 }
1276 else if (!m_deferredOperation)
1277 {
1278 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
1279 }
1280 else
1281 {
1282 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1283 const auto deferredOperation = deferredOperationPtr.get();
1284
1285 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
1286
1287 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1288
1289 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1290 }
1291
1292 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1293 {
1294 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1295 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1296
1297 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1298 }
1299 }
1300
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1301 void BottomLevelAccelerationStructureKHR::serialize (const DeviceInterface& vk,
1302 const VkDevice device,
1303 const VkCommandBuffer cmdBuffer,
1304 SerialStorage* storage)
1305 {
1306 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1307 DE_ASSERT(storage != DE_NULL);
1308
1309 const VkCopyAccelerationStructureToMemoryInfoKHR copyAccelerationStructureInfo =
1310 {
1311 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, // VkStructureType sType;
1312 DE_NULL, // const void* pNext;
1313 *(getPtr()), // VkAccelerationStructureKHR src;
1314 storage->getAddress(vk, device, m_buildType), // VkDeviceOrHostAddressKHR dst;
1315 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
1316 };
1317
1318 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1319 {
1320 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, ©AccelerationStructureInfo);
1321 }
1322 else if (!m_deferredOperation)
1323 {
1324 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, ©AccelerationStructureInfo));
1325 }
1326 else
1327 {
1328 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1329 const auto deferredOperation = deferredOperationPtr.get();
1330
1331 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, ©AccelerationStructureInfo);
1332
1333 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1334
1335 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1336 }
1337 }
1338
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1339 void BottomLevelAccelerationStructureKHR::deserialize (const DeviceInterface& vk,
1340 const VkDevice device,
1341 const VkCommandBuffer cmdBuffer,
1342 SerialStorage* storage)
1343 {
1344 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1345 DE_ASSERT(storage != DE_NULL);
1346
1347 const VkCopyMemoryToAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1348 {
1349 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
1350 DE_NULL, // const void* pNext;
1351 storage->getAddressConst(vk, device, m_buildType), // VkDeviceOrHostAddressConstKHR src;
1352 *(getPtr()), // VkAccelerationStructureKHR dst;
1353 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
1354 };
1355
1356 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1357 {
1358 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
1359 }
1360 else if (!m_deferredOperation)
1361 {
1362 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
1363 }
1364 else
1365 {
1366 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1367 const auto deferredOperation = deferredOperationPtr.get();
1368
1369 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
1370
1371 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1372
1373 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1374 }
1375
1376 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1377 {
1378 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1379 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1380
1381 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1382 }
1383 }
1384
getPtr(void) const1385 const VkAccelerationStructureKHR* BottomLevelAccelerationStructureKHR::getPtr (void) const
1386 {
1387 return &m_accelerationStructureKHR.get();
1388 }
1389
prepareGeometries(const DeviceInterface & vk,const VkDevice device,std::vector<VkAccelerationStructureGeometryKHR> & accelerationStructureGeometriesKHR,std::vector<VkAccelerationStructureGeometryKHR * > & accelerationStructureGeometriesKHRPointers,std::vector<VkAccelerationStructureBuildRangeInfoKHR> & accelerationStructureBuildRangeInfoKHR,std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> & accelerationStructureGeometryMicromapsEXT,std::vector<deUint32> & maxPrimitiveCounts,VkDeviceSize vertexBufferOffset,VkDeviceSize indexBufferOffset) const1390 void BottomLevelAccelerationStructureKHR::prepareGeometries (const DeviceInterface& vk,
1391 const VkDevice device,
1392 std::vector<VkAccelerationStructureGeometryKHR>& accelerationStructureGeometriesKHR,
1393 std::vector<VkAccelerationStructureGeometryKHR*>& accelerationStructureGeometriesKHRPointers,
1394 std::vector<VkAccelerationStructureBuildRangeInfoKHR>& accelerationStructureBuildRangeInfoKHR,
1395 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>& accelerationStructureGeometryMicromapsEXT,
1396 std::vector<deUint32>& maxPrimitiveCounts,
1397 VkDeviceSize vertexBufferOffset,
1398 VkDeviceSize indexBufferOffset) const
1399 {
1400 accelerationStructureGeometriesKHR.resize(m_geometriesData.size());
1401 accelerationStructureGeometriesKHRPointers.resize(m_geometriesData.size());
1402 accelerationStructureBuildRangeInfoKHR.resize(m_geometriesData.size());
1403 accelerationStructureGeometryMicromapsEXT.resize(m_geometriesData.size());
1404 maxPrimitiveCounts.resize(m_geometriesData.size());
1405
1406 for (size_t geometryNdx = 0; geometryNdx < m_geometriesData.size(); ++geometryNdx)
1407 {
1408 const de::SharedPtr<RaytracedGeometryBase>& geometryData = m_geometriesData[geometryNdx];
1409 VkDeviceOrHostAddressConstKHR vertexData, indexData;
1410 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1411 {
1412 if (getVertexBuffer() != DE_NULL)
1413 {
1414 vertexData = makeDeviceOrHostAddressConstKHR(vk, device, getVertexBuffer()->get(), vertexBufferOffset);
1415 if (m_indirectBuffer == DE_NULL )
1416 {
1417 vertexBufferOffset += deAlignSize(geometryData->getVertexByteSize(), 8);
1418 }
1419 }
1420 else
1421 vertexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1422
1423 if (getIndexBuffer() != DE_NULL && geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1424 {
1425 indexData = makeDeviceOrHostAddressConstKHR(vk, device, getIndexBuffer()->get(), indexBufferOffset);
1426 indexBufferOffset += deAlignSize(geometryData->getIndexByteSize(), 8);
1427 }
1428 else
1429 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1430 }
1431 else
1432 {
1433 vertexData = makeDeviceOrHostAddressConstKHR(geometryData->getVertexPointer());
1434 if (geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1435 indexData = makeDeviceOrHostAddressConstKHR(geometryData->getIndexPointer());
1436 else
1437 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1438 }
1439
1440 VkAccelerationStructureGeometryTrianglesDataKHR accelerationStructureGeometryTrianglesDataKHR =
1441 {
1442 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // VkStructureType sType;
1443 DE_NULL, // const void* pNext;
1444 geometryData->getVertexFormat(), // VkFormat vertexFormat;
1445 vertexData, // VkDeviceOrHostAddressConstKHR vertexData;
1446 geometryData->getVertexStride(), // VkDeviceSize vertexStride;
1447 static_cast<deUint32>(geometryData->getVertexCount()), // uint32_t maxVertex;
1448 geometryData->getIndexType(), // VkIndexType indexType;
1449 indexData, // VkDeviceOrHostAddressConstKHR indexData;
1450 makeDeviceOrHostAddressConstKHR(DE_NULL), // VkDeviceOrHostAddressConstKHR transformData;
1451 };
1452
1453 if (geometryData->getHasOpacityMicromap())
1454 accelerationStructureGeometryTrianglesDataKHR.pNext = &geometryData->getOpacityMicromap();
1455
1456 const VkAccelerationStructureGeometryAabbsDataKHR accelerationStructureGeometryAabbsDataKHR =
1457 {
1458 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // VkStructureType sType;
1459 DE_NULL, // const void* pNext;
1460 vertexData, // VkDeviceOrHostAddressConstKHR data;
1461 geometryData->getAABBStride() // VkDeviceSize stride;
1462 };
1463 const VkAccelerationStructureGeometryDataKHR geometry = (geometryData->isTrianglesType())
1464 ? makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryTrianglesDataKHR)
1465 : makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryAabbsDataKHR);
1466 const VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR =
1467 {
1468 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // VkStructureType sType;
1469 DE_NULL, // const void* pNext;
1470 geometryData->getGeometryType(), // VkGeometryTypeKHR geometryType;
1471 geometry, // VkAccelerationStructureGeometryDataKHR geometry;
1472 geometryData->getGeometryFlags() // VkGeometryFlagsKHR flags;
1473 };
1474
1475 const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : geometryData->getPrimitiveCount());
1476
1477 const VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfosKHR =
1478 {
1479 primitiveCount, // deUint32 primitiveCount;
1480 0, // deUint32 primitiveOffset;
1481 0, // deUint32 firstVertex;
1482 0 // deUint32 firstTransform;
1483 };
1484
1485 accelerationStructureGeometriesKHR[geometryNdx] = accelerationStructureGeometryKHR;
1486 accelerationStructureGeometriesKHRPointers[geometryNdx] = &accelerationStructureGeometriesKHR[geometryNdx];
1487 accelerationStructureBuildRangeInfoKHR[geometryNdx] = accelerationStructureBuildRangeInfosKHR;
1488 maxPrimitiveCounts[geometryNdx] = geometryData->getPrimitiveCount();
1489 }
1490 }
1491
getRequiredAllocationCount(void)1492 deUint32 BottomLevelAccelerationStructure::getRequiredAllocationCount (void)
1493 {
1494 return BottomLevelAccelerationStructureKHR::getRequiredAllocationCount();
1495 }
1496
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)1497 void BottomLevelAccelerationStructure::createAndBuild (const DeviceInterface& vk,
1498 const VkDevice device,
1499 const VkCommandBuffer cmdBuffer,
1500 Allocator& allocator,
1501 VkDeviceAddress deviceAddress)
1502 {
1503 create(vk, device, allocator, 0u, deviceAddress);
1504 build(vk, device, cmdBuffer);
1505 }
1506
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,BottomLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)1507 void BottomLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface& vk,
1508 const VkDevice device,
1509 const VkCommandBuffer cmdBuffer,
1510 Allocator& allocator,
1511 BottomLevelAccelerationStructure* accelerationStructure,
1512 VkDeviceSize compactCopySize,
1513 VkDeviceAddress deviceAddress)
1514 {
1515 DE_ASSERT(accelerationStructure != NULL);
1516 VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
1517 DE_ASSERT(copiedSize != 0u);
1518
1519 create(vk, device, allocator, copiedSize, deviceAddress);
1520 copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
1521 }
1522
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)1523 void BottomLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
1524 const VkDevice device,
1525 const VkCommandBuffer cmdBuffer,
1526 Allocator& allocator,
1527 SerialStorage* storage,
1528 VkDeviceAddress deviceAddress )
1529 {
1530 DE_ASSERT(storage != NULL);
1531 DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
1532 create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
1533 deserialize(vk, device, cmdBuffer, storage);
1534 }
1535
makeBottomLevelAccelerationStructure()1536 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure ()
1537 {
1538 return de::MovePtr<BottomLevelAccelerationStructure>(new BottomLevelAccelerationStructureKHR);
1539 }
1540
1541 // Forward declaration
1542 struct BottomLevelAccelerationStructurePoolImpl;
1543
1544 class BottomLevelAccelerationStructurePoolMember : public BottomLevelAccelerationStructureKHR
1545 {
1546 public:
1547 friend class BottomLevelAccelerationStructurePool;
1548
1549 BottomLevelAccelerationStructurePoolMember (BottomLevelAccelerationStructurePoolImpl& pool);
1550 BottomLevelAccelerationStructurePoolMember (const BottomLevelAccelerationStructurePoolMember&) = delete;
1551 BottomLevelAccelerationStructurePoolMember (BottomLevelAccelerationStructurePoolMember&&) = delete;
1552 virtual ~BottomLevelAccelerationStructurePoolMember () = default;
1553
create(const DeviceInterface &,const VkDevice,Allocator &,VkDeviceSize,VkDeviceAddress,const void *,const MemoryRequirement &)1554 virtual void create (const DeviceInterface&,
1555 const VkDevice,
1556 Allocator&,
1557 VkDeviceSize,
1558 VkDeviceAddress,
1559 const void*,
1560 const MemoryRequirement&) override
1561 {
1562 DE_ASSERT(0); // Silent this method
1563 }
1564 virtual auto computeBuildSize (const DeviceInterface& vk,
1565 const VkDevice device,
1566 const VkDeviceSize strSize) const
1567 // accStrSize,updateScratch, buildScratch, vertexSize, indexSize
1568 -> std::tuple<VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize>;
1569 protected:
1570 struct Info;
1571 virtual void preCreateSetSizesAndOffsets (const Info& info,
1572 const VkDeviceSize accStrSize,
1573 const VkDeviceSize updateScratchSize,
1574 const VkDeviceSize buildScratchSize);
1575 virtual void createAccellerationStructure (const DeviceInterface& vk,
1576 const VkDevice device,
1577 VkDeviceAddress deviceAddress);
1578
1579 virtual BufferWithMemory* getAccelerationStructureBuffer () const override;
1580 virtual BufferWithMemory* getDeviceScratchBuffer () const override;
1581 virtual std::vector<deUint8>* getHostScratchBuffer () const override;
1582 virtual BufferWithMemory* getVertexBuffer () const override;
1583 virtual BufferWithMemory* getIndexBuffer () const override;
1584
getAccelerationStructureBufferOffset() const1585 virtual VkDeviceSize getAccelerationStructureBufferOffset () const override { return m_info.accStrOffset; }
getDeviceScratchBufferOffset() const1586 virtual VkDeviceSize getDeviceScratchBufferOffset () const override { return m_info.buildScratchBuffOffset; }
getVertexBufferOffset() const1587 virtual VkDeviceSize getVertexBufferOffset () const override { return m_info.vertBuffOffset; }
getIndexBufferOffset() const1588 virtual VkDeviceSize getIndexBufferOffset () const override { return m_info.indexBuffOffset; }
1589
1590 BottomLevelAccelerationStructurePoolImpl& m_pool;
1591
1592 struct Info
1593 {
1594 deUint32 accStrIndex;
1595 VkDeviceSize accStrOffset;
1596 deUint32 vertBuffIndex;
1597 VkDeviceSize vertBuffOffset;
1598 deUint32 indexBuffIndex;
1599 VkDeviceSize indexBuffOffset;
1600 deUint32 buildScratchBuffIndex;
1601 VkDeviceSize buildScratchBuffOffset;
1602 } m_info;
1603 };
1604
negz(const X &)1605 template<class X> inline X negz (const X&)
1606 {
1607 return (~static_cast<X>(0));
1608 }
isnegz(const X & x)1609 template<class X> inline bool isnegz (const X& x)
1610 {
1611 return x == negz(x);
1612 }
make_unsigned(const Y & y)1613 template<class Y> inline auto make_unsigned(const Y& y) -> typename std::make_unsigned<Y>::type
1614 {
1615 return static_cast<typename std::make_unsigned<Y>::type>(y);
1616 }
1617
BottomLevelAccelerationStructurePoolMember(BottomLevelAccelerationStructurePoolImpl & pool)1618 BottomLevelAccelerationStructurePoolMember::BottomLevelAccelerationStructurePoolMember (BottomLevelAccelerationStructurePoolImpl& pool)
1619 : m_pool (pool)
1620 , m_info {}
1621 {
1622 }
1623
1624 struct BottomLevelAccelerationStructurePoolImpl
1625 {
1626 BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePoolImpl&&) = delete;
1627 BottomLevelAccelerationStructurePoolImpl (const BottomLevelAccelerationStructurePoolImpl&) = delete;
1628 BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool);
1629
1630 BottomLevelAccelerationStructurePool& m_pool;
1631 std::vector<de::SharedPtr<BufferWithMemory>> m_accellerationStructureBuffers;
1632 de::SharedPtr<BufferWithMemory> m_deviceScratchBuffer;
1633 de::UniquePtr<std::vector<deUint8>> m_hostScratchBuffer;
1634 std::vector<de::SharedPtr<BufferWithMemory>> m_vertexBuffers;
1635 std::vector<de::SharedPtr<BufferWithMemory>> m_indexBuffers;
1636 };
BottomLevelAccelerationStructurePoolImpl(BottomLevelAccelerationStructurePool & pool)1637 BottomLevelAccelerationStructurePoolImpl::BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool)
1638 : m_pool (pool)
1639 , m_accellerationStructureBuffers ()
1640 , m_deviceScratchBuffer ()
1641 , m_hostScratchBuffer (new std::vector<deUint8>)
1642 , m_vertexBuffers ()
1643 , m_indexBuffers ()
1644 {
1645 }
getAccelerationStructureBuffer() const1646 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getAccelerationStructureBuffer () const
1647 {
1648 BufferWithMemory* result = nullptr;
1649 if (m_pool.m_accellerationStructureBuffers.size())
1650 {
1651 DE_ASSERT(!isnegz(m_info.accStrIndex));
1652 result = m_pool.m_accellerationStructureBuffers[m_info.accStrIndex].get();
1653 }
1654 return result;
1655 }
getDeviceScratchBuffer() const1656 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getDeviceScratchBuffer () const
1657 {
1658 DE_ASSERT(m_info.buildScratchBuffIndex == 0);
1659 return m_pool.m_deviceScratchBuffer.get();
1660 }
getHostScratchBuffer() const1661 std::vector<deUint8>* BottomLevelAccelerationStructurePoolMember::getHostScratchBuffer () const
1662 {
1663 return this->m_buildScratchSize ? m_pool.m_hostScratchBuffer.get() : nullptr;
1664 }
1665
getVertexBuffer() const1666 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getVertexBuffer () const
1667 {
1668 BufferWithMemory* result = nullptr;
1669 if (m_pool.m_vertexBuffers.size())
1670 {
1671 DE_ASSERT(!isnegz(m_info.vertBuffIndex));
1672 result = m_pool.m_vertexBuffers[m_info.vertBuffIndex].get();
1673 }
1674 return result;
1675 }
getIndexBuffer() const1676 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getIndexBuffer () const
1677 {
1678 BufferWithMemory* result = nullptr;
1679 if (m_pool.m_indexBuffers.size())
1680 {
1681 DE_ASSERT(!isnegz(m_info.indexBuffIndex));
1682 result = m_pool.m_indexBuffers[m_info.indexBuffIndex].get();
1683 }
1684 return result;
1685 }
1686
1687 struct BottomLevelAccelerationStructurePool::Impl : BottomLevelAccelerationStructurePoolImpl
1688 {
1689 friend class BottomLevelAccelerationStructurePool;
1690 friend class BottomLevelAccelerationStructurePoolMember;
1691
Implvk::BottomLevelAccelerationStructurePool::Impl1692 Impl (BottomLevelAccelerationStructurePool& pool)
1693 : BottomLevelAccelerationStructurePoolImpl(pool) { }
1694 };
1695
BottomLevelAccelerationStructurePool()1696 BottomLevelAccelerationStructurePool::BottomLevelAccelerationStructurePool ()
1697 : m_batchStructCount (4)
1698 , m_batchGeomCount (0)
1699 , m_infos ()
1700 , m_structs ()
1701 , m_createOnce (false)
1702 , m_tryCachedMemory (true)
1703 , m_structsBuffSize (0)
1704 , m_updatesScratchSize (0)
1705 , m_buildsScratchSize (0)
1706 , m_verticesSize (0)
1707 , m_indicesSize (0)
1708 , m_impl (new Impl(*this))
1709 {
1710 }
1711
~BottomLevelAccelerationStructurePool()1712 BottomLevelAccelerationStructurePool::~BottomLevelAccelerationStructurePool()
1713 {
1714 delete m_impl;
1715 }
1716
batchStructCount(const deUint32 & value)1717 void BottomLevelAccelerationStructurePool::batchStructCount (const deUint32& value)
1718 {
1719 DE_ASSERT(value >= 1); m_batchStructCount = value;
1720 }
1721
add(VkDeviceSize structureSize,VkDeviceAddress deviceAddress)1722 auto BottomLevelAccelerationStructurePool::add (VkDeviceSize structureSize,
1723 VkDeviceAddress deviceAddress) -> BottomLevelAccelerationStructurePool::BlasPtr
1724 {
1725 // Prevent a programmer from calling this method after batchCreate(...) method has been called.
1726 if (m_createOnce) DE_ASSERT(0);
1727
1728 auto blas = new BottomLevelAccelerationStructurePoolMember(*m_impl);
1729 m_infos.push_back({structureSize, deviceAddress});
1730 m_structs.emplace_back(blas);
1731 return m_structs.back();
1732 }
1733
adjustBatchCount(const DeviceInterface & vkd,const VkDevice device,const std::vector<BottomLevelAccelerationStructurePool::BlasPtr> & structs,const std::vector<BottomLevelAccelerationStructurePool::BlasInfo> & infos,const VkDeviceSize maxBufferSize,deUint32 (& result)[4])1734 void adjustBatchCount (const DeviceInterface& vkd,
1735 const VkDevice device,
1736 const std::vector<BottomLevelAccelerationStructurePool::BlasPtr>& structs,
1737 const std::vector<BottomLevelAccelerationStructurePool::BlasInfo>& infos,
1738 const VkDeviceSize maxBufferSize,
1739 deUint32 (&result)[4])
1740 {
1741 tcu::Vector<VkDeviceSize, 4> sizes(0);
1742 tcu::Vector<VkDeviceSize, 4> sums(0);
1743 tcu::Vector<deUint32, 4> tmps(0);
1744 tcu::Vector<deUint32, 4> batches(0);
1745
1746 VkDeviceSize updateScratchSize = 0; static_cast<void>(updateScratchSize); // not used yet, disabled for future implementation
1747
1748 auto updateIf = [&](deUint32 c)
1749 {
1750 if (sums[c] + sizes[c] <= maxBufferSize)
1751 {
1752 sums[c] += sizes[c];
1753 tmps[c] += 1;
1754
1755 batches[c] = std::max(tmps[c], batches[c]);
1756 }
1757 else
1758 {
1759 sums[c] = 0;
1760 tmps[c] = 0;
1761 }
1762 };
1763
1764 const deUint32 maxIter = static_cast<deUint32>(structs.size());
1765 for (deUint32 i = 0; i < maxIter; ++i)
1766 {
1767 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(structs[i].get());
1768 std::tie(sizes[0], updateScratchSize, sizes[1], sizes[2], sizes[3]) = str.computeBuildSize(vkd, device, infos[i].structureSize);
1769
1770 updateIf(0);
1771 updateIf(1);
1772 updateIf(2);
1773 updateIf(3);
1774 }
1775
1776 result[0] = std::max(batches[0], 1u);
1777 result[1] = std::max(batches[1], 1u);
1778 result[2] = std::max(batches[2], 1u);
1779 result[3] = std::max(batches[3], 1u);
1780 }
1781
getAllocationCount() const1782 size_t BottomLevelAccelerationStructurePool::getAllocationCount () const
1783 {
1784 return m_impl->m_accellerationStructureBuffers.size()
1785 + m_impl->m_vertexBuffers.size()
1786 + m_impl->m_indexBuffers.size()
1787 + 1 /* for scratch buffer */;
1788 }
1789
getAllocationCount(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize maxBufferSize) const1790 size_t BottomLevelAccelerationStructurePool::getAllocationCount (const DeviceInterface& vk,
1791 const VkDevice device,
1792 const VkDeviceSize maxBufferSize) const
1793 {
1794 DE_ASSERT(m_structs.size() != 0);
1795
1796 std::map<deUint32, VkDeviceSize> accStrSizes;
1797 std::map<deUint32, VkDeviceSize> vertBuffSizes;
1798 std::map<deUint32, VkDeviceSize> indexBuffSizes;
1799 std::map<deUint32, VkDeviceSize> scratchBuffSizes;
1800
1801 const deUint32 allStructsCount = structCount();
1802
1803 deUint32 batchStructCount = m_batchStructCount;
1804 deUint32 batchScratchCount = m_batchStructCount;
1805 deUint32 batchVertexCount = m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1806 deUint32 batchIndexCount = batchVertexCount;
1807
1808 if (!isnegz(maxBufferSize))
1809 {
1810 deUint32 batches[4];
1811 adjustBatchCount(vk, device, m_structs, m_infos, maxBufferSize, batches);
1812 batchStructCount = batches[0];
1813 batchScratchCount = batches[1];
1814 batchVertexCount = batches[2];
1815 batchIndexCount = batches[3];
1816 }
1817
1818 deUint32 iStr = 0;
1819 deUint32 iScratch = 0;
1820 deUint32 iVertex = 0;
1821 deUint32 iIndex = 0;
1822
1823 VkDeviceSize strSize = 0;
1824 VkDeviceSize updateScratchSize = 0;
1825 VkDeviceSize buildScratchSize = 0;
1826 VkDeviceSize vertexSize = 0;
1827 VkDeviceSize indexSize = 0;
1828
1829 for (; iStr < allStructsCount; ++iStr)
1830 {
1831 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1832 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[iStr].structureSize);
1833
1834 {
1835 const VkDeviceSize alignedStrSize = deAlign64(strSize, 256);
1836 const deUint32 accStrIndex = (iStr / batchStructCount);
1837 accStrSizes[accStrIndex] += alignedStrSize;
1838 }
1839
1840 if (buildScratchSize != 0)
1841 {
1842 const VkDeviceSize alignedBuilsScratchSize = deAlign64(buildScratchSize, 256);
1843 const deUint32 scratchBuffIndex = (iScratch/ batchScratchCount);
1844 scratchBuffSizes[scratchBuffIndex] += alignedBuilsScratchSize;
1845 iScratch += 1;
1846 }
1847
1848 if (vertexSize != 0)
1849 {
1850 const VkDeviceSize alignedVertBuffSize = deAlign64(vertexSize, 8);
1851 const deUint32 vertBuffIndex = (iVertex / batchVertexCount);
1852 vertBuffSizes[vertBuffIndex] += alignedVertBuffSize;
1853 iVertex += 1;
1854 }
1855
1856 if (indexSize != 0)
1857 {
1858 const VkDeviceSize alignedIndexBuffSize = deAlign64(indexSize, 8);
1859 const deUint32 indexBuffIndex = (iIndex / batchIndexCount);
1860 indexBuffSizes[indexBuffIndex] += alignedIndexBuffSize;
1861 iIndex += 1;
1862 }
1863 }
1864
1865 return accStrSizes.size()
1866 + vertBuffSizes.size()
1867 + indexBuffSizes.size()
1868 + scratchBuffSizes.size();
1869 }
1870
getAllocationSizes(const DeviceInterface & vk,const VkDevice device) const1871 tcu::Vector<VkDeviceSize, 4> BottomLevelAccelerationStructurePool::getAllocationSizes (const DeviceInterface& vk,
1872 const VkDevice device) const
1873 {
1874 if (m_structsBuffSize)
1875 {
1876 return tcu::Vector<VkDeviceSize, 4>(m_structsBuffSize, m_buildsScratchSize, m_verticesSize, m_indicesSize);
1877 }
1878
1879 VkDeviceSize strSize = 0;
1880 VkDeviceSize updateScratchSize = 0; static_cast<void>(updateScratchSize); // not used yet, disabled for future implementation
1881 VkDeviceSize buildScratchSize = 0;
1882 VkDeviceSize vertexSize = 0;
1883 VkDeviceSize indexSize = 0;
1884 VkDeviceSize sumStrSize = 0;
1885 VkDeviceSize sumUpdateScratchSize = 0; static_cast<void>(sumUpdateScratchSize); // not used yet, disabled for future implementation
1886 VkDeviceSize sumBuildScratchSize = 0;
1887 VkDeviceSize sumVertexSize = 0;
1888 VkDeviceSize sumIndexSize = 0;
1889 for (size_t i = 0; i < structCount(); ++i)
1890 {
1891 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[i].get());
1892 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[i].structureSize);
1893 sumStrSize += deAlign64(strSize, 256);
1894 //sumUpdateScratchSize += deAlign64(updateScratchSize, 256); not used yet, disabled for future implementation
1895 sumBuildScratchSize += deAlign64(buildScratchSize, 256);
1896 sumVertexSize += deAlign64(vertexSize, 8);
1897 sumIndexSize += deAlign64(indexSize, 8);
1898 }
1899 return tcu::Vector<VkDeviceSize, 4>(sumStrSize, sumBuildScratchSize, sumVertexSize, sumIndexSize);
1900 }
1901
batchCreate(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator)1902 void BottomLevelAccelerationStructurePool::batchCreate (const DeviceInterface& vkd,
1903 const VkDevice device,
1904 Allocator& allocator)
1905 {
1906 batchCreateAdjust(vkd, device, allocator, negz<VkDeviceSize>(0));
1907 }
1908
batchCreateAdjust(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator,const VkDeviceSize maxBufferSize)1909 void BottomLevelAccelerationStructurePool::batchCreateAdjust (const DeviceInterface& vkd,
1910 const VkDevice device,
1911 Allocator& allocator,
1912 const VkDeviceSize maxBufferSize)
1913 {
1914 // Prevent a programmer from calling this method more than once.
1915 if (m_createOnce) DE_ASSERT(0);
1916
1917 m_createOnce = true;
1918 DE_ASSERT(m_structs.size() != 0);
1919
1920 auto createAccellerationStructureBuffer = [&](VkDeviceSize bufferSize) -> typename std::add_pointer<BufferWithMemory>::type
1921 {
1922 BufferWithMemory* res = nullptr;
1923 const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1924
1925 if (m_tryCachedMemory) try
1926 {
1927 res = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1928 }
1929 catch (const tcu::NotSupportedError&)
1930 {
1931 res = nullptr;
1932 }
1933
1934 return (nullptr != res)
1935 ? res
1936 : (new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1937 };
1938
1939 auto createDeviceScratchBuffer = [&](VkDeviceSize bufferSize) -> de::SharedPtr<BufferWithMemory>
1940 {
1941 const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1942 BufferWithMemory* p = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1943 return de::SharedPtr<BufferWithMemory>(p);
1944 };
1945
1946 std::map<deUint32, VkDeviceSize> accStrSizes;
1947 std::map<deUint32, VkDeviceSize> vertBuffSizes;
1948 std::map<deUint32, VkDeviceSize> indexBuffSizes;
1949
1950 const deUint32 allStructsCount = structCount();
1951 deUint32 iterKey = 0;
1952
1953 deUint32 batchStructCount = m_batchStructCount;
1954 deUint32 batchVertexCount = m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1955 deUint32 batchIndexCount = batchVertexCount;
1956
1957 if (!isnegz(maxBufferSize))
1958 {
1959 deUint32 batches[4];
1960 adjustBatchCount(vkd, device, m_structs, m_infos, maxBufferSize, batches);
1961 batchStructCount = batches[0];
1962 // batches[1]: batchScratchCount
1963 batchVertexCount = batches[2];
1964 batchIndexCount = batches[3];
1965 }
1966
1967 deUint32 iStr = 0;
1968 deUint32 iVertex = 0;
1969 deUint32 iIndex = 0;
1970
1971 VkDeviceSize strSize = 0;
1972 VkDeviceSize updateScratchSize = 0;
1973 VkDeviceSize buildScratchSize = 0;
1974 VkDeviceSize maxBuildScratchSize = 0;
1975 VkDeviceSize vertexSize = 0;
1976 VkDeviceSize indexSize = 0;
1977
1978 VkDeviceSize strOffset = 0;
1979 VkDeviceSize vertexOffset = 0;
1980 VkDeviceSize indexOffset = 0;
1981
1982 deUint32 hostStructCount = 0;
1983 deUint32 deviceStructCount = 0;
1984
1985 for (; iStr < allStructsCount; ++iStr)
1986 {
1987 BottomLevelAccelerationStructurePoolMember::Info info{};
1988 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1989 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vkd, device, m_infos[iStr].structureSize);
1990
1991 ++(str.getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR ? hostStructCount : deviceStructCount);
1992
1993 {
1994 const VkDeviceSize alignedStrSize = deAlign64(strSize, 256);
1995 const deUint32 accStrIndex = (iStr / batchStructCount);
1996 if (iStr != 0 && (iStr % batchStructCount) == 0)
1997 {
1998 strOffset = 0;
1999 }
2000
2001 info.accStrIndex = accStrIndex;
2002 info.accStrOffset = strOffset;
2003 accStrSizes[accStrIndex] += alignedStrSize;
2004 strOffset += alignedStrSize;
2005 m_structsBuffSize += alignedStrSize;
2006 }
2007
2008 if (buildScratchSize != 0)
2009 {
2010 maxBuildScratchSize = std::max(maxBuildScratchSize, make_unsigned(deAlign64(buildScratchSize, 256u)));
2011
2012 info.buildScratchBuffIndex = 0;
2013 info.buildScratchBuffOffset = 0;
2014 }
2015
2016 if (vertexSize != 0)
2017 {
2018 const VkDeviceSize alignedVertBuffSize = deAlign64(vertexSize, 8);
2019 const deUint32 vertBuffIndex = (iVertex / batchVertexCount);
2020 if (iVertex != 0 && (iVertex % batchVertexCount) == 0)
2021 {
2022 vertexOffset = 0;
2023 }
2024
2025 info.vertBuffIndex = vertBuffIndex;
2026 info.vertBuffOffset = vertexOffset;
2027 vertBuffSizes[vertBuffIndex] += alignedVertBuffSize;
2028 vertexOffset += alignedVertBuffSize;
2029 m_verticesSize += alignedVertBuffSize;
2030 iVertex += 1;
2031 }
2032
2033 if (indexSize != 0)
2034 {
2035 const VkDeviceSize alignedIndexBuffSize = deAlign64(indexSize, 8);
2036 const deUint32 indexBuffIndex = (iIndex / batchIndexCount);
2037 if (iIndex != 0 && (iIndex % batchIndexCount) == 0)
2038 {
2039 indexOffset = 0;
2040 }
2041
2042 info.indexBuffIndex = indexBuffIndex;
2043 info.indexBuffOffset = indexOffset;
2044 indexBuffSizes[indexBuffIndex] += alignedIndexBuffSize;
2045 indexOffset += alignedIndexBuffSize;
2046 m_indicesSize += alignedIndexBuffSize;
2047 iIndex += 1;
2048 }
2049
2050 str.preCreateSetSizesAndOffsets(info, strSize, updateScratchSize, buildScratchSize);
2051 }
2052
2053 for (iterKey = 0; iterKey < static_cast<deUint32>(accStrSizes.size()); ++iterKey)
2054 {
2055 m_impl->m_accellerationStructureBuffers.emplace_back(createAccellerationStructureBuffer(accStrSizes.at(iterKey)));
2056 }
2057 for (iterKey = 0; iterKey < static_cast<deUint32>(vertBuffSizes.size()); ++iterKey)
2058 {
2059 m_impl->m_vertexBuffers.emplace_back(createVertexBuffer(vkd, device, allocator, vertBuffSizes.at(iterKey)));
2060 }
2061 for (iterKey = 0; iterKey < static_cast<deUint32>(indexBuffSizes.size()); ++iterKey)
2062 {
2063 m_impl->m_indexBuffers.emplace_back(createIndexBuffer(vkd, device, allocator, indexBuffSizes.at(iterKey)));
2064 }
2065
2066 if (maxBuildScratchSize)
2067 {
2068 if (hostStructCount) m_impl->m_hostScratchBuffer->resize(static_cast<size_t>(maxBuildScratchSize));
2069 if (deviceStructCount) m_impl->m_deviceScratchBuffer = createDeviceScratchBuffer(maxBuildScratchSize);
2070
2071 m_buildsScratchSize = maxBuildScratchSize;
2072 }
2073
2074 for (iterKey = 0; iterKey < allStructsCount; ++iterKey)
2075 {
2076 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iterKey].get());
2077 str.createAccellerationStructure(vkd, device, m_infos[iterKey].deviceAddress);
2078 }
2079 }
2080
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandBuffer cmdBuffer)2081 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface& vk,
2082 const VkDevice device,
2083 VkCommandBuffer cmdBuffer)
2084 {
2085 for (const auto& str : m_structs)
2086 {
2087 str->build(vk, device, cmdBuffer);
2088 }
2089 }
2090
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandPool cmdPool,VkQueue queue)2091 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface& vk,
2092 const VkDevice device,
2093 VkCommandPool cmdPool,
2094 VkQueue queue)
2095 {
2096 const deUint32 limit = 10000u;
2097 const deUint32 count = structCount();
2098 std::vector<BlasPtr> buildingOnDevice;
2099
2100 auto buildOnDevice = [&]() -> void
2101 {
2102 Move<VkCommandBuffer> cmd = allocateCommandBuffer(vk, device, cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2103
2104 beginCommandBuffer(vk, *cmd, 0u);
2105 for (const auto& str : buildingOnDevice)
2106 str->build(vk, device, *cmd);
2107 endCommandBuffer(vk, *cmd);
2108
2109 submitCommandsAndWait(vk, device, queue, *cmd);
2110 vk.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2111 };
2112
2113 buildingOnDevice.reserve(limit);
2114 for (deUint32 i = 0; i < count; ++i)
2115 {
2116 auto str = m_structs[i];
2117
2118 if (str->getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
2119 str->build(vk, device, DE_NULL);
2120 else
2121 buildingOnDevice.emplace_back(str);
2122
2123 if ( buildingOnDevice.size() == limit || (count - 1) == i)
2124 {
2125 buildOnDevice();
2126 buildingOnDevice.clear();
2127 }
2128 }
2129 }
2130
computeBuildSize(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize strSize) const2131 auto BottomLevelAccelerationStructurePoolMember::computeBuildSize (const DeviceInterface& vk,
2132 const VkDevice device,
2133 const VkDeviceSize strSize) const
2134 // accStrSize,updateScratch,buildScratch, vertexSize, indexSize
2135 -> std::tuple<VkDeviceSize, VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize>
2136 {
2137 DE_ASSERT(!m_geometriesData.empty() != !(strSize == 0)); // logical xor
2138
2139 std::tuple<VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize> result(deAlign64(strSize, 256), 0, 0, 0, 0);
2140
2141 if (!m_geometriesData.empty())
2142 {
2143 std::vector<VkAccelerationStructureGeometryKHR> accelerationStructureGeometriesKHR;
2144 std::vector<VkAccelerationStructureGeometryKHR*> accelerationStructureGeometriesKHRPointers;
2145 std::vector<VkAccelerationStructureBuildRangeInfoKHR> accelerationStructureBuildRangeInfoKHR;
2146 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
2147 std::vector<deUint32> maxPrimitiveCounts;
2148 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
2149
2150 const VkAccelerationStructureGeometryKHR* accelerationStructureGeometriesKHRPointer = accelerationStructureGeometriesKHR.data();
2151 const VkAccelerationStructureGeometryKHR* const* accelerationStructureGeometry = accelerationStructureGeometriesKHRPointers.data();
2152
2153 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2154 {
2155 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2156 DE_NULL, // const void* pNext;
2157 VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2158 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2159 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2160 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2161 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
2162 static_cast<deUint32>(accelerationStructureGeometriesKHR.size()), // deUint32 geometryCount;
2163 m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer, // const VkAccelerationStructureGeometryKHR* pGeometries;
2164 m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL, // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2165 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
2166 };
2167
2168 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2169 {
2170 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2171 DE_NULL, // const void* pNext;
2172 0, // VkDeviceSize accelerationStructureSize;
2173 0, // VkDeviceSize updateScratchSize;
2174 0 // VkDeviceSize buildScratchSize;
2175 };
2176
2177 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2178
2179 std::get<0>(result) = sizeInfo.accelerationStructureSize;
2180 std::get<1>(result) = sizeInfo.updateScratchSize;
2181 std::get<2>(result) = sizeInfo.buildScratchSize;
2182 std::get<3>(result) = getVertexBufferSize(m_geometriesData);
2183 std::get<4>(result) = getIndexBufferSize(m_geometriesData);
2184 }
2185
2186 return result;
2187 }
2188
preCreateSetSizesAndOffsets(const Info & info,const VkDeviceSize accStrSize,const VkDeviceSize updateScratchSize,const VkDeviceSize buildScratchSize)2189 void BottomLevelAccelerationStructurePoolMember::preCreateSetSizesAndOffsets (const Info& info,
2190 const VkDeviceSize accStrSize,
2191 const VkDeviceSize updateScratchSize,
2192 const VkDeviceSize buildScratchSize)
2193 {
2194 m_info = info;
2195 m_structureSize = accStrSize;
2196 m_updateScratchSize = updateScratchSize;
2197 m_buildScratchSize = buildScratchSize;
2198 }
2199
createAccellerationStructure(const DeviceInterface & vk,const VkDevice device,VkDeviceAddress deviceAddress)2200 void BottomLevelAccelerationStructurePoolMember::createAccellerationStructure (const DeviceInterface& vk,
2201 const VkDevice device,
2202 VkDeviceAddress deviceAddress)
2203 {
2204 const VkAccelerationStructureTypeKHR structureType = (m_createGeneric
2205 ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2206 : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
2207 const VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfoKHR
2208 {
2209 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // VkStructureType sType;
2210 DE_NULL, // const void* pNext;
2211 m_createFlags, // VkAccelerationStructureCreateFlagsKHR createFlags;
2212 getAccelerationStructureBuffer()->get(), // VkBuffer buffer;
2213 getAccelerationStructureBufferOffset(), // VkDeviceSize offset;
2214 m_structureSize, // VkDeviceSize size;
2215 structureType, // VkAccelerationStructureTypeKHR type;
2216 deviceAddress // VkDeviceAddress deviceAddress;
2217 };
2218
2219 m_accelerationStructureKHR = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2220 }
2221
~TopLevelAccelerationStructure()2222 TopLevelAccelerationStructure::~TopLevelAccelerationStructure ()
2223 {
2224 }
2225
TopLevelAccelerationStructure()2226 TopLevelAccelerationStructure::TopLevelAccelerationStructure ()
2227 : m_structureSize (0u)
2228 , m_updateScratchSize (0u)
2229 , m_buildScratchSize (0u)
2230 {
2231 }
2232
setInstanceCount(const size_t instanceCount)2233 void TopLevelAccelerationStructure::setInstanceCount (const size_t instanceCount)
2234 {
2235 m_bottomLevelInstances.reserve(instanceCount);
2236 m_instanceData.reserve(instanceCount);
2237 }
2238
addInstance(de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,const VkTransformMatrixKHR & matrix,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags)2239 void TopLevelAccelerationStructure::addInstance (de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,
2240 const VkTransformMatrixKHR& matrix,
2241 deUint32 instanceCustomIndex,
2242 deUint32 mask,
2243 deUint32 instanceShaderBindingTableRecordOffset,
2244 VkGeometryInstanceFlagsKHR flags)
2245 {
2246 m_bottomLevelInstances.push_back(bottomLevelStructure);
2247 m_instanceData.push_back(InstanceData(matrix, instanceCustomIndex, mask, instanceShaderBindingTableRecordOffset, flags));
2248 }
2249
getStructureBuildSizes() const2250 VkAccelerationStructureBuildSizesInfoKHR TopLevelAccelerationStructure::getStructureBuildSizes () const
2251 {
2252 return
2253 {
2254 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2255 DE_NULL, // const void* pNext;
2256 m_structureSize, // VkDeviceSize accelerationStructureSize;
2257 m_updateScratchSize, // VkDeviceSize updateScratchSize;
2258 m_buildScratchSize // VkDeviceSize buildScratchSize;
2259 };
2260 }
2261
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)2262 void TopLevelAccelerationStructure::createAndBuild (const DeviceInterface& vk,
2263 const VkDevice device,
2264 const VkCommandBuffer cmdBuffer,
2265 Allocator& allocator,
2266 VkDeviceAddress deviceAddress)
2267 {
2268 create(vk, device, allocator, 0u, deviceAddress);
2269 build(vk, device, cmdBuffer);
2270 }
2271
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,TopLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)2272 void TopLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface& vk,
2273 const VkDevice device,
2274 const VkCommandBuffer cmdBuffer,
2275 Allocator& allocator,
2276 TopLevelAccelerationStructure* accelerationStructure,
2277 VkDeviceSize compactCopySize,
2278 VkDeviceAddress deviceAddress)
2279 {
2280 DE_ASSERT(accelerationStructure != NULL);
2281 VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
2282 DE_ASSERT(copiedSize != 0u);
2283
2284 create(vk, device, allocator, copiedSize, deviceAddress);
2285 copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
2286 }
2287
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)2288 void TopLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
2289 const VkDevice device,
2290 const VkCommandBuffer cmdBuffer,
2291 Allocator& allocator,
2292 SerialStorage* storage,
2293 VkDeviceAddress deviceAddress)
2294 {
2295 DE_ASSERT(storage != NULL);
2296 DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
2297 create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
2298 if (storage->hasDeepFormat()) createAndDeserializeBottoms(vk, device, cmdBuffer, allocator, storage);
2299 deserialize(vk, device, cmdBuffer, storage);
2300 }
2301
createInstanceBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelInstances,std::vector<InstanceData> instanceData,const bool tryCachedMemory)2302 BufferWithMemory* createInstanceBuffer (const DeviceInterface& vk,
2303 const VkDevice device,
2304 Allocator& allocator,
2305 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > bottomLevelInstances,
2306 std::vector<InstanceData> instanceData,
2307 const bool tryCachedMemory)
2308 {
2309 DE_ASSERT(bottomLevelInstances.size() != 0);
2310 DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2311 DE_UNREF(instanceData);
2312
2313 BufferWithMemory* result = nullptr;
2314 const VkDeviceSize bufferSizeBytes = bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2315 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2316 if (tryCachedMemory) try
2317 {
2318 result = new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2319 }
2320 catch (const tcu::NotSupportedError&)
2321 {
2322 result = nullptr;
2323 }
2324 return result
2325 ? result
2326 : new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2327 }
2328
updateSingleInstance(const DeviceInterface & vk,const VkDevice device,const BottomLevelAccelerationStructure & bottomLevelAccelerationStructure,const InstanceData & instanceData,deUint8 * bufferLocation,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2329 void updateSingleInstance (const DeviceInterface& vk,
2330 const VkDevice device,
2331 const BottomLevelAccelerationStructure& bottomLevelAccelerationStructure,
2332 const InstanceData& instanceData,
2333 deUint8* bufferLocation,
2334 VkAccelerationStructureBuildTypeKHR buildType,
2335 bool inactiveInstances)
2336 {
2337 const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
2338
2339 // This part needs to be fixed once a new version of the VkAccelerationStructureInstanceKHR will be added to vkStructTypes.inl
2340 VkDeviceAddress accelerationStructureAddress;
2341 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2342 {
2343 VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
2344 {
2345 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // VkStructureType sType;
2346 DE_NULL, // const void* pNext;
2347 accelerationStructureKHR // VkAccelerationStructureKHR accelerationStructure;
2348 };
2349 accelerationStructureAddress = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
2350 }
2351
2352 deUint64 structureReference;
2353 if (inactiveInstances)
2354 {
2355 // Instances will be marked inactive by making their references VK_NULL_HANDLE or having address zero.
2356 structureReference = 0ull;
2357 }
2358 else
2359 {
2360 structureReference = (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2361 ? deUint64(accelerationStructureAddress)
2362 : deUint64(accelerationStructureKHR.getInternal());
2363 }
2364
2365 VkAccelerationStructureInstanceKHR accelerationStructureInstanceKHR = makeVkAccelerationStructureInstanceKHR
2366 (
2367 instanceData.matrix, // VkTransformMatrixKHR transform;
2368 instanceData.instanceCustomIndex, // deUint32 instanceCustomIndex:24;
2369 instanceData.mask, // deUint32 mask:8;
2370 instanceData.instanceShaderBindingTableRecordOffset, // deUint32 instanceShaderBindingTableRecordOffset:24;
2371 instanceData.flags, // VkGeometryInstanceFlagsKHR flags:8;
2372 structureReference // deUint64 accelerationStructureReference;
2373 );
2374
2375 deMemcpy(bufferLocation, &accelerationStructureInstanceKHR, sizeof(VkAccelerationStructureInstanceKHR));
2376 }
2377
updateInstanceBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelInstances,const std::vector<InstanceData> & instanceData,const BufferWithMemory * instanceBuffer,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2378 void updateInstanceBuffer (const DeviceInterface& vk,
2379 const VkDevice device,
2380 const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>& bottomLevelInstances,
2381 const std::vector<InstanceData>& instanceData,
2382 const BufferWithMemory* instanceBuffer,
2383 VkAccelerationStructureBuildTypeKHR buildType,
2384 bool inactiveInstances)
2385 {
2386 DE_ASSERT(bottomLevelInstances.size() != 0);
2387 DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2388
2389 auto& instancesAlloc = instanceBuffer->getAllocation();
2390 auto bufferStart = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2391 VkDeviceSize bufferOffset = 0ull;
2392
2393 for (size_t instanceNdx = 0; instanceNdx < bottomLevelInstances.size(); ++instanceNdx)
2394 {
2395 const auto& blas = *bottomLevelInstances[instanceNdx];
2396 updateSingleInstance(vk, device, blas, instanceData[instanceNdx], bufferStart + bufferOffset, buildType, inactiveInstances);
2397 bufferOffset += sizeof(VkAccelerationStructureInstanceKHR);
2398 }
2399
2400 flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2401 }
2402
2403 class TopLevelAccelerationStructureKHR : public TopLevelAccelerationStructure
2404 {
2405 public:
2406 static deUint32 getRequiredAllocationCount (void);
2407
2408 TopLevelAccelerationStructureKHR ();
2409 TopLevelAccelerationStructureKHR (const TopLevelAccelerationStructureKHR& other) = delete;
2410 virtual ~TopLevelAccelerationStructureKHR ();
2411
2412 void setBuildType (const VkAccelerationStructureBuildTypeKHR buildType) override;
2413 void setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags) override;
2414 void setCreateGeneric (bool createGeneric) override;
2415 void setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags) override;
2416 void setBuildWithoutPrimitives (bool buildWithoutPrimitives) override;
2417 void setInactiveInstances (bool inactiveInstances) override;
2418 void setDeferredOperation (const bool deferredOperation,
2419 const deUint32 workerThreadCount) override;
2420 void setUseArrayOfPointers (const bool useArrayOfPointers) override;
2421 void setIndirectBuildParameters (const VkBuffer indirectBuffer,
2422 const VkDeviceSize indirectBufferOffset,
2423 const deUint32 indirectBufferStride) override;
2424 void setUsePPGeometries (const bool usePPGeometries) override;
2425 void setTryCachedMemory (const bool tryCachedMemory) override;
2426 VkBuildAccelerationStructureFlagsKHR getBuildFlags () const override;
2427
2428 void getCreationSizes (const DeviceInterface& vk,
2429 const VkDevice device,
2430 const VkDeviceSize structureSize,
2431 CreationSizes& sizes) override;
2432 void create (const DeviceInterface& vk,
2433 const VkDevice device,
2434 Allocator& allocator,
2435 VkDeviceSize structureSize,
2436 VkDeviceAddress deviceAddress = 0u,
2437 const void* pNext = DE_NULL,
2438 const MemoryRequirement& addMemoryRequirement = MemoryRequirement::Any) override;
2439 void build (const DeviceInterface& vk,
2440 const VkDevice device,
2441 const VkCommandBuffer cmdBuffer) override;
2442 void copyFrom (const DeviceInterface& vk,
2443 const VkDevice device,
2444 const VkCommandBuffer cmdBuffer,
2445 TopLevelAccelerationStructure* accelerationStructure,
2446 bool compactCopy) override;
2447 void serialize (const DeviceInterface& vk,
2448 const VkDevice device,
2449 const VkCommandBuffer cmdBuffer,
2450 SerialStorage* storage) override;
2451 void deserialize (const DeviceInterface& vk,
2452 const VkDevice device,
2453 const VkCommandBuffer cmdBuffer,
2454 SerialStorage* storage) override;
2455
2456 std::vector<VkDeviceSize> getSerializingSizes (const DeviceInterface& vk,
2457 const VkDevice device,
2458 const VkQueue queue,
2459 const deUint32 queueFamilyIndex) override;
2460
2461 std::vector<deUint64> getSerializingAddresses (const DeviceInterface& vk,
2462 const VkDevice device) const override;
2463
2464
2465 const VkAccelerationStructureKHR* getPtr (void) const override;
2466
2467 void updateInstanceMatrix (const DeviceInterface& vk,
2468 const VkDevice device,
2469 size_t instanceIndex,
2470 const VkTransformMatrixKHR& matrix) override;
2471
2472 protected:
2473 VkAccelerationStructureBuildTypeKHR m_buildType;
2474 VkAccelerationStructureCreateFlagsKHR m_createFlags;
2475 bool m_createGeneric;
2476 VkBuildAccelerationStructureFlagsKHR m_buildFlags;
2477 bool m_buildWithoutPrimitives;
2478 bool m_inactiveInstances;
2479 bool m_deferredOperation;
2480 deUint32 m_workerThreadCount;
2481 bool m_useArrayOfPointers;
2482 de::MovePtr<BufferWithMemory> m_accelerationStructureBuffer;
2483 de::MovePtr<BufferWithMemory> m_instanceBuffer;
2484 de::MovePtr<BufferWithMemory> m_instanceAddressBuffer;
2485 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
2486 std::vector<deUint8> m_hostScratchBuffer;
2487 Move<VkAccelerationStructureKHR> m_accelerationStructureKHR;
2488 VkBuffer m_indirectBuffer;
2489 VkDeviceSize m_indirectBufferOffset;
2490 deUint32 m_indirectBufferStride;
2491 bool m_usePPGeometries;
2492 bool m_tryCachedMemory;
2493
2494
2495 void prepareInstances (const DeviceInterface& vk,
2496 const VkDevice device,
2497 VkAccelerationStructureGeometryKHR& accelerationStructureGeometryKHR,
2498 std::vector<deUint32>& maxPrimitiveCounts);
2499
2500 void serializeBottoms (const DeviceInterface& vk,
2501 const VkDevice device,
2502 const VkCommandBuffer cmdBuffer,
2503 SerialStorage* storage,
2504 VkDeferredOperationKHR deferredOperation);
2505
2506 void createAndDeserializeBottoms (const DeviceInterface& vk,
2507 const VkDevice device,
2508 const VkCommandBuffer cmdBuffer,
2509 Allocator& allocator,
2510 SerialStorage* storage) override;
2511 };
2512
getRequiredAllocationCount(void)2513 deUint32 TopLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
2514 {
2515 /*
2516 de::MovePtr<BufferWithMemory> m_instanceBuffer;
2517 de::MovePtr<Allocation> m_accelerationStructureAlloc;
2518 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
2519 */
2520 return 3u;
2521 }
2522
TopLevelAccelerationStructureKHR()2523 TopLevelAccelerationStructureKHR::TopLevelAccelerationStructureKHR ()
2524 : TopLevelAccelerationStructure ()
2525 , m_buildType (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2526 , m_createFlags (0u)
2527 , m_createGeneric (false)
2528 , m_buildFlags (0u)
2529 , m_buildWithoutPrimitives (false)
2530 , m_inactiveInstances (false)
2531 , m_deferredOperation (false)
2532 , m_workerThreadCount (0)
2533 , m_useArrayOfPointers (false)
2534 , m_accelerationStructureBuffer (DE_NULL)
2535 , m_instanceBuffer (DE_NULL)
2536 , m_instanceAddressBuffer (DE_NULL)
2537 , m_deviceScratchBuffer (DE_NULL)
2538 , m_accelerationStructureKHR ()
2539 , m_indirectBuffer (DE_NULL)
2540 , m_indirectBufferOffset (0)
2541 , m_indirectBufferStride (0)
2542 , m_usePPGeometries (false)
2543 , m_tryCachedMemory (true)
2544 {
2545 }
2546
~TopLevelAccelerationStructureKHR()2547 TopLevelAccelerationStructureKHR::~TopLevelAccelerationStructureKHR ()
2548 {
2549 }
2550
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)2551 void TopLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR buildType)
2552 {
2553 m_buildType = buildType;
2554 }
2555
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)2556 void TopLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags)
2557 {
2558 m_createFlags = createFlags;
2559 }
2560
setCreateGeneric(bool createGeneric)2561 void TopLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
2562 {
2563 m_createGeneric = createGeneric;
2564 }
2565
setInactiveInstances(bool inactiveInstances)2566 void TopLevelAccelerationStructureKHR::setInactiveInstances (bool inactiveInstances)
2567 {
2568 m_inactiveInstances = inactiveInstances;
2569 }
2570
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)2571 void TopLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags)
2572 {
2573 m_buildFlags = buildFlags;
2574 }
2575
setBuildWithoutPrimitives(bool buildWithoutPrimitives)2576 void TopLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
2577 {
2578 m_buildWithoutPrimitives = buildWithoutPrimitives;
2579 }
2580
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)2581 void TopLevelAccelerationStructureKHR::setDeferredOperation (const bool deferredOperation,
2582 const deUint32 workerThreadCount)
2583 {
2584 m_deferredOperation = deferredOperation;
2585 m_workerThreadCount = workerThreadCount;
2586 }
2587
setUseArrayOfPointers(const bool useArrayOfPointers)2588 void TopLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool useArrayOfPointers)
2589 {
2590 m_useArrayOfPointers = useArrayOfPointers;
2591 }
2592
setUsePPGeometries(const bool usePPGeometries)2593 void TopLevelAccelerationStructureKHR::setUsePPGeometries (const bool usePPGeometries)
2594 {
2595 m_usePPGeometries = usePPGeometries;
2596 }
2597
setTryCachedMemory(const bool tryCachedMemory)2598 void TopLevelAccelerationStructureKHR::setTryCachedMemory (const bool tryCachedMemory)
2599 {
2600 m_tryCachedMemory = tryCachedMemory;
2601 }
2602
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)2603 void TopLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer indirectBuffer,
2604 const VkDeviceSize indirectBufferOffset,
2605 const deUint32 indirectBufferStride)
2606 {
2607 m_indirectBuffer = indirectBuffer;
2608 m_indirectBufferOffset = indirectBufferOffset;
2609 m_indirectBufferStride = indirectBufferStride;
2610 }
2611
getBuildFlags() const2612 VkBuildAccelerationStructureFlagsKHR TopLevelAccelerationStructureKHR::getBuildFlags () const
2613 {
2614 return m_buildFlags;
2615 }
2616
sum() const2617 VkDeviceSize TopLevelAccelerationStructure::CreationSizes::sum () const
2618 {
2619 return structure + updateScratch + buildScratch + instancePointers + instancesBuffer;
2620 }
2621
getCreationSizes(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize structureSize,CreationSizes & sizes)2622 void TopLevelAccelerationStructureKHR::getCreationSizes (const DeviceInterface& vk,
2623 const VkDevice device,
2624 const VkDeviceSize structureSize,
2625 CreationSizes& sizes)
2626 {
2627 // AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2628 // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2629 DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2630
2631 if (structureSize == 0)
2632 {
2633 VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR;
2634 const auto accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2635 std::vector<deUint32> maxPrimitiveCounts;
2636 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2637
2638 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2639 {
2640 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2641 DE_NULL, // const void* pNext;
2642 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2643 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2644 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2645 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2646 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
2647 1u, // deUint32 geometryCount;
2648 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR), // const VkAccelerationStructureGeometryKHR* pGeometries;
2649 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr), // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2650 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
2651 };
2652
2653 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2654 {
2655 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2656 DE_NULL, // const void* pNext;
2657 0, // VkDeviceSize accelerationStructureSize;
2658 0, // VkDeviceSize updateScratchSize;
2659 0 // VkDeviceSize buildScratchSize;
2660 };
2661
2662 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2663
2664 sizes.structure = sizeInfo.accelerationStructureSize;
2665 sizes.updateScratch = sizeInfo.updateScratchSize;
2666 sizes.buildScratch = sizeInfo.buildScratchSize;
2667 }
2668 else
2669 {
2670 sizes.structure = structureSize;
2671 sizes.updateScratch = 0u;
2672 sizes.buildScratch = 0u;
2673 }
2674
2675 sizes.instancePointers = 0u;
2676 if (m_useArrayOfPointers)
2677 {
2678 const size_t pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2679 sizes.instancePointers = static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize);
2680 }
2681
2682 sizes.instancesBuffer = m_bottomLevelInstances.empty() ? 0u : m_bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2683 }
2684
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement)2685 void TopLevelAccelerationStructureKHR::create (const DeviceInterface& vk,
2686 const VkDevice device,
2687 Allocator& allocator,
2688 VkDeviceSize structureSize,
2689 VkDeviceAddress deviceAddress,
2690 const void* pNext,
2691 const MemoryRequirement& addMemoryRequirement)
2692 {
2693 // AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2694 // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2695 DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2696
2697 if (structureSize == 0)
2698 {
2699 VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR;
2700 const auto accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2701 std::vector<deUint32> maxPrimitiveCounts;
2702 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2703
2704 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2705 {
2706 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2707 DE_NULL, // const void* pNext;
2708 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2709 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2710 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2711 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2712 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
2713 1u, // deUint32 geometryCount;
2714 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR), // const VkAccelerationStructureGeometryKHR* pGeometries;
2715 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr), // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2716 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
2717 };
2718
2719 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2720 {
2721 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2722 DE_NULL, // const void* pNext;
2723 0, // VkDeviceSize accelerationStructureSize;
2724 0, // VkDeviceSize updateScratchSize;
2725 0 // VkDeviceSize buildScratchSize;
2726 };
2727
2728 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2729
2730 m_structureSize = sizeInfo.accelerationStructureSize;
2731 m_updateScratchSize = sizeInfo.updateScratchSize;
2732 m_buildScratchSize = sizeInfo.buildScratchSize;
2733 }
2734 else
2735 {
2736 m_structureSize = structureSize;
2737 m_updateScratchSize = 0u;
2738 m_buildScratchSize = 0u;
2739 }
2740
2741 {
2742 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2743 const MemoryRequirement memoryRequirement = addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
2744
2745 try
2746 {
2747 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
2748 }
2749 catch (const tcu::NotSupportedError&)
2750 {
2751 // retry without Cached flag
2752 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
2753 }
2754 }
2755
2756 {
2757 const VkAccelerationStructureTypeKHR structureType = (m_createGeneric
2758 ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2759 : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
2760 const VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfoKHR =
2761 {
2762 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // VkStructureType sType;
2763 pNext, // const void* pNext;
2764 m_createFlags, // VkAccelerationStructureCreateFlagsKHR createFlags;
2765 m_accelerationStructureBuffer->get(), // VkBuffer buffer;
2766 0u, // VkDeviceSize offset;
2767 m_structureSize, // VkDeviceSize size;
2768 structureType, // VkAccelerationStructureTypeKHR type;
2769 deviceAddress // VkDeviceAddress deviceAddress;
2770 };
2771
2772 m_accelerationStructureKHR = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2773 }
2774
2775 if (m_buildScratchSize > 0u)
2776 {
2777 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2778 {
2779 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2780 m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2781 }
2782 else
2783 {
2784 m_hostScratchBuffer.resize(static_cast<size_t>(m_buildScratchSize));
2785 }
2786 }
2787
2788 if (m_useArrayOfPointers)
2789 {
2790 const size_t pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2791 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2792 m_instanceAddressBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2793 }
2794
2795 if(!m_bottomLevelInstances.empty())
2796 m_instanceBuffer = de::MovePtr<BufferWithMemory>(createInstanceBuffer(vk, device, allocator, m_bottomLevelInstances, m_instanceData, m_tryCachedMemory));
2797 }
2798
updateInstanceMatrix(const DeviceInterface & vk,const VkDevice device,size_t instanceIndex,const VkTransformMatrixKHR & matrix)2799 void TopLevelAccelerationStructureKHR::updateInstanceMatrix (const DeviceInterface& vk, const VkDevice device, size_t instanceIndex, const VkTransformMatrixKHR& matrix)
2800 {
2801 DE_ASSERT(m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR);
2802 DE_ASSERT(instanceIndex < m_bottomLevelInstances.size());
2803 DE_ASSERT(instanceIndex < m_instanceData.size());
2804
2805 const auto& blas = *m_bottomLevelInstances[instanceIndex];
2806 auto& instanceData = m_instanceData[instanceIndex];
2807 auto& instancesAlloc = m_instanceBuffer->getAllocation();
2808 auto bufferStart = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2809 VkDeviceSize bufferOffset = sizeof(VkAccelerationStructureInstanceKHR) * instanceIndex;
2810
2811 instanceData.matrix = matrix;
2812 updateSingleInstance(vk, device, blas, instanceData, bufferStart + bufferOffset, m_buildType, m_inactiveInstances);
2813 flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2814 }
2815
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer)2816 void TopLevelAccelerationStructureKHR::build (const DeviceInterface& vk,
2817 const VkDevice device,
2818 const VkCommandBuffer cmdBuffer)
2819 {
2820 DE_ASSERT(!m_bottomLevelInstances.empty());
2821 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2822 DE_ASSERT(m_buildScratchSize != 0);
2823
2824 updateInstanceBuffer(vk, device, m_bottomLevelInstances, m_instanceData, m_instanceBuffer.get(), m_buildType, m_inactiveInstances);
2825
2826 VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR;
2827 const auto accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2828 std::vector<deUint32> maxPrimitiveCounts;
2829 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2830
2831 VkDeviceOrHostAddressKHR scratchData = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2832 ? makeDeviceOrHostAddressKHR(vk, device, m_deviceScratchBuffer->get(), 0)
2833 : makeDeviceOrHostAddressKHR(m_hostScratchBuffer.data());
2834
2835 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2836 {
2837 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2838 DE_NULL, // const void* pNext;
2839 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2840 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2841 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2842 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2843 m_accelerationStructureKHR.get(), // VkAccelerationStructureKHR dstAccelerationStructure;
2844 1u, // deUint32 geometryCount;
2845 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR), // const VkAccelerationStructureGeometryKHR* pGeometries;
2846 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr), // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2847 scratchData // VkDeviceOrHostAddressKHR scratchData;
2848 };
2849
2850 const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : static_cast<deUint32>(m_bottomLevelInstances.size()));
2851
2852 VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfoKHR =
2853 {
2854 primitiveCount, // deUint32 primitiveCount;
2855 0, // deUint32 primitiveOffset;
2856 0, // deUint32 firstVertex;
2857 0 // deUint32 transformOffset;
2858 };
2859 VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr = &accelerationStructureBuildRangeInfoKHR;
2860
2861 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2862 {
2863 if (m_indirectBuffer == DE_NULL)
2864 vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2865 else
2866 {
2867 VkDeviceAddress indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
2868 deUint32* pMaxPrimitiveCounts = maxPrimitiveCounts.data();
2869 vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
2870 }
2871 }
2872 else if (!m_deferredOperation)
2873 {
2874 VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
2875 }
2876 else
2877 {
2878 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2879 const auto deferredOperation = deferredOperationPtr.get();
2880
2881 VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2882
2883 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2884
2885 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2886
2887 accelerationStructureBuildGeometryInfoKHR.pNext = DE_NULL;
2888 }
2889
2890 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2891 {
2892 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2893 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
2894
2895 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2896 }
2897 }
2898
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,TopLevelAccelerationStructure * accelerationStructure,bool compactCopy)2899 void TopLevelAccelerationStructureKHR::copyFrom (const DeviceInterface& vk,
2900 const VkDevice device,
2901 const VkCommandBuffer cmdBuffer,
2902 TopLevelAccelerationStructure* accelerationStructure,
2903 bool compactCopy)
2904 {
2905 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2906 DE_ASSERT(accelerationStructure != DE_NULL);
2907
2908 VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
2909 {
2910 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
2911 DE_NULL, // const void* pNext;
2912 *(accelerationStructure->getPtr()), // VkAccelerationStructureKHR src;
2913 *(getPtr()), // VkAccelerationStructureKHR dst;
2914 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR // VkCopyAccelerationStructureModeKHR mode;
2915 };
2916
2917 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2918 {
2919 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
2920 }
2921 else if (!m_deferredOperation)
2922 {
2923 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
2924 }
2925 else
2926 {
2927 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2928 const auto deferredOperation = deferredOperationPtr.get();
2929
2930 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
2931
2932 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2933
2934 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2935 }
2936
2937 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2938 {
2939 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2940 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
2941
2942 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2943 }
2944
2945 }
2946
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)2947 void TopLevelAccelerationStructureKHR::serialize (const DeviceInterface& vk,
2948 const VkDevice device,
2949 const VkCommandBuffer cmdBuffer,
2950 SerialStorage* storage)
2951 {
2952 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2953 DE_ASSERT(storage != DE_NULL);
2954
2955 const VkCopyAccelerationStructureToMemoryInfoKHR copyAccelerationStructureInfo =
2956 {
2957 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, // VkStructureType sType;
2958 DE_NULL, // const void* pNext;
2959 *(getPtr()), // VkAccelerationStructureKHR src;
2960 storage->getAddress(vk, device, m_buildType), // VkDeviceOrHostAddressKHR dst;
2961 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
2962 };
2963
2964 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2965 {
2966 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, ©AccelerationStructureInfo);
2967 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2968 }
2969 else if (!m_deferredOperation)
2970 {
2971 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, ©AccelerationStructureInfo));
2972 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2973 }
2974 else
2975 {
2976 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2977 const auto deferredOperation = deferredOperationPtr.get();
2978
2979 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, ©AccelerationStructureInfo);
2980
2981 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2982 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, deferredOperation);
2983
2984 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2985 }
2986 }
2987
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)2988 void TopLevelAccelerationStructureKHR::deserialize (const DeviceInterface& vk,
2989 const VkDevice device,
2990 const VkCommandBuffer cmdBuffer,
2991 SerialStorage* storage)
2992 {
2993 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2994 DE_ASSERT(storage != DE_NULL);
2995
2996 const VkCopyMemoryToAccelerationStructureInfoKHR copyAccelerationStructureInfo =
2997 {
2998 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
2999 DE_NULL, // const void* pNext;
3000 storage->getAddressConst(vk, device, m_buildType), // VkDeviceOrHostAddressConstKHR src;
3001 *(getPtr()), // VkAccelerationStructureKHR dst;
3002 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
3003 };
3004
3005 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3006 {
3007 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
3008 }
3009 else if (!m_deferredOperation)
3010 {
3011 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
3012 }
3013 else
3014 {
3015 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
3016 const auto deferredOperation = deferredOperationPtr.get();
3017
3018 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
3019
3020 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3021
3022 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3023 }
3024
3025 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3026 {
3027 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3028 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
3029
3030 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3031 }
3032 }
3033
serializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage,VkDeferredOperationKHR deferredOperation)3034 void TopLevelAccelerationStructureKHR::serializeBottoms (const DeviceInterface& vk,
3035 const VkDevice device,
3036 const VkCommandBuffer cmdBuffer,
3037 SerialStorage* storage,
3038 VkDeferredOperationKHR deferredOperation)
3039 {
3040 DE_UNREF(deferredOperation);
3041 DE_ASSERT(storage->hasDeepFormat());
3042
3043 const std::vector<deUint64>& addresses = storage->getSerialInfo().addresses();
3044 const std::size_t cbottoms = m_bottomLevelInstances.size();
3045
3046 deUint32 storageIndex = 0;
3047 std::vector<deUint64> matches;
3048
3049 for (std::size_t i = 0; i < cbottoms; ++i)
3050 {
3051 const deUint64& lookAddr = addresses[i+1];
3052 auto end = matches.end();
3053 auto match = std::find_if(matches.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
3054 if (match == end)
3055 {
3056 matches.emplace_back(lookAddr);
3057 m_bottomLevelInstances[i].get()->serialize(vk, device, cmdBuffer, storage->getBottomStorage(storageIndex).get());
3058 storageIndex += 1;
3059 }
3060 }
3061 }
3062
createAndDeserializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage)3063 void TopLevelAccelerationStructureKHR::createAndDeserializeBottoms (const DeviceInterface& vk,
3064 const VkDevice device,
3065 const VkCommandBuffer cmdBuffer,
3066 Allocator& allocator,
3067 SerialStorage* storage)
3068 {
3069 DE_ASSERT(storage->hasDeepFormat());
3070 DE_ASSERT(m_bottomLevelInstances.size() == 0);
3071
3072 const std::vector<deUint64>& addresses = storage->getSerialInfo().addresses();
3073 const std::size_t cbottoms = addresses.size() - 1;
3074 deUint32 storageIndex = 0;
3075 std::vector<std::pair<deUint64, std::size_t>> matches;
3076
3077 for (std::size_t i = 0; i < cbottoms; ++i)
3078 {
3079 const deUint64& lookAddr = addresses[i+1];
3080 auto end = matches.end();
3081 auto match = std::find_if(matches.begin(), end, [&](const std::pair<deUint64, std::size_t>& item){ return item.first == lookAddr; });
3082 if (match != end)
3083 {
3084 m_bottomLevelInstances .emplace_back(m_bottomLevelInstances[match->second]);
3085 }
3086 else
3087 {
3088 de::MovePtr<BottomLevelAccelerationStructure> blas = makeBottomLevelAccelerationStructure();
3089 blas->createAndDeserializeFrom(vk, device, cmdBuffer, allocator, storage->getBottomStorage(storageIndex).get());
3090 m_bottomLevelInstances.emplace_back(de::SharedPtr<BottomLevelAccelerationStructure>(blas.release()));
3091 matches.emplace_back(lookAddr, i);
3092 storageIndex += 1;
3093 }
3094 }
3095
3096 std::vector<deUint64> newAddresses = getSerializingAddresses(vk, device);
3097 DE_ASSERT(addresses.size() == newAddresses.size());
3098
3099 SerialStorage::AccelerationStructureHeader* header = storage->getASHeader();
3100 DE_ASSERT(cbottoms ==header->handleCount);
3101
3102 // finally update bottom-level AS addresses before top-level AS deserialization
3103 for (std::size_t i = 0; i < cbottoms; ++i)
3104 {
3105 header->handleArray[i] = newAddresses[i+1];
3106 }
3107 }
3108
getSerializingSizes(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const deUint32 queueFamilyIndex)3109 std::vector<VkDeviceSize> TopLevelAccelerationStructureKHR::getSerializingSizes (const DeviceInterface& vk,
3110 const VkDevice device,
3111 const VkQueue queue,
3112 const deUint32 queueFamilyIndex)
3113 {
3114 const deUint32 queryCount(deUint32(m_bottomLevelInstances.size()) + 1);
3115 std::vector<VkAccelerationStructureKHR> handles(queryCount);
3116 std::vector<VkDeviceSize> sizes(queryCount);
3117
3118 handles[0] = m_accelerationStructureKHR.get();
3119
3120 for (deUint32 h = 1; h < queryCount; ++h)
3121 handles[h] = *m_bottomLevelInstances[h-1].get()->getPtr();
3122
3123 if (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR == m_buildType)
3124 queryAccelerationStructureSize(vk, device, DE_NULL, handles, m_buildType, DE_NULL, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3125 else
3126 {
3127 const Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, queueFamilyIndex);
3128 const Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3129 const Move<VkQueryPool> queryPool = makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3130
3131 beginCommandBuffer(vk, *cmdBuffer);
3132 queryAccelerationStructureSize(vk, device, *cmdBuffer, handles, m_buildType, *queryPool, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3133 endCommandBuffer(vk, *cmdBuffer);
3134 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
3135
3136 VK_CHECK(vk.getQueryPoolResults(device, *queryPool, 0u, queryCount, queryCount * sizeof(VkDeviceSize), sizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3137 }
3138
3139 return sizes;
3140 }
3141
getSerializingAddresses(const DeviceInterface & vk,const VkDevice device) const3142 std::vector<deUint64> TopLevelAccelerationStructureKHR::getSerializingAddresses (const DeviceInterface& vk, const VkDevice device) const
3143 {
3144 std::vector<deUint64> result(m_bottomLevelInstances.size() + 1);
3145
3146 VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
3147 {
3148 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // VkStructureType sType;
3149 DE_NULL, // const void* pNext;
3150 DE_NULL // VkAccelerationStructureKHR accelerationStructure;
3151 };
3152
3153 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3154 {
3155 asDeviceAddressInfo.accelerationStructure = m_accelerationStructureKHR.get();
3156 result[0] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3157 }
3158 else
3159 {
3160 result[0] = deUint64(getPtr()->getInternal());
3161 }
3162
3163 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3164 {
3165 const BottomLevelAccelerationStructure& bottomLevelAccelerationStructure = *m_bottomLevelInstances[instanceNdx];
3166 const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
3167
3168 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3169 {
3170 asDeviceAddressInfo.accelerationStructure = accelerationStructureKHR;
3171 result[instanceNdx+1] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3172 }
3173 else
3174 {
3175 result[instanceNdx+1] = deUint64(accelerationStructureKHR.getInternal());
3176 }
3177 }
3178
3179 return result;
3180 }
3181
getPtr(void) const3182 const VkAccelerationStructureKHR* TopLevelAccelerationStructureKHR::getPtr (void) const
3183 {
3184 return &m_accelerationStructureKHR.get();
3185 }
3186
prepareInstances(const DeviceInterface & vk,const VkDevice device,VkAccelerationStructureGeometryKHR & accelerationStructureGeometryKHR,std::vector<deUint32> & maxPrimitiveCounts)3187 void TopLevelAccelerationStructureKHR::prepareInstances (const DeviceInterface& vk,
3188 const VkDevice device,
3189 VkAccelerationStructureGeometryKHR& accelerationStructureGeometryKHR,
3190 std::vector<deUint32>& maxPrimitiveCounts)
3191 {
3192 maxPrimitiveCounts.resize(1);
3193 maxPrimitiveCounts[0] = static_cast<deUint32>(m_bottomLevelInstances.size());
3194
3195 VkDeviceOrHostAddressConstKHR instancesData;
3196 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3197 {
3198 if(m_instanceBuffer.get() != DE_NULL)
3199 {
3200 if (m_useArrayOfPointers)
3201 {
3202 deUint8* bufferStart = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3203 VkDeviceSize bufferOffset = 0;
3204 VkDeviceOrHostAddressConstKHR firstInstance = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3205 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3206 {
3207 VkDeviceOrHostAddressConstKHR currentInstance;
3208 currentInstance.deviceAddress = firstInstance.deviceAddress + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3209
3210 deMemcpy(&bufferStart[bufferOffset], ¤tInstance, sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress));
3211 bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress);
3212 }
3213 flushMappedMemoryRange(vk, device, m_instanceAddressBuffer->getAllocation().getMemory(), m_instanceAddressBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
3214
3215 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceAddressBuffer->get(), 0);
3216 }
3217 else
3218 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3219 }
3220 else
3221 instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3222 }
3223 else
3224 {
3225 if (m_instanceBuffer.get() != DE_NULL)
3226 {
3227 if (m_useArrayOfPointers)
3228 {
3229 deUint8* bufferStart = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3230 VkDeviceSize bufferOffset = 0;
3231 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3232 {
3233 VkDeviceOrHostAddressConstKHR currentInstance;
3234 currentInstance.hostAddress = (deUint8*)m_instanceBuffer->getAllocation().getHostPtr() + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3235
3236 deMemcpy(&bufferStart[bufferOffset], ¤tInstance, sizeof(VkDeviceOrHostAddressConstKHR::hostAddress));
3237 bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
3238 }
3239 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceAddressBuffer->getAllocation().getHostPtr());
3240 }
3241 else
3242 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceBuffer->getAllocation().getHostPtr());
3243 }
3244 else
3245 instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3246 }
3247
3248 VkAccelerationStructureGeometryInstancesDataKHR accelerationStructureGeometryInstancesDataKHR =
3249 {
3250 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // VkStructureType sType;
3251 DE_NULL, // const void* pNext;
3252 (VkBool32)( m_useArrayOfPointers ? DE_TRUE : DE_FALSE ), // VkBool32 arrayOfPointers;
3253 instancesData // VkDeviceOrHostAddressConstKHR data;
3254 };
3255
3256 accelerationStructureGeometryKHR =
3257 {
3258 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // VkStructureType sType;
3259 DE_NULL, // const void* pNext;
3260 VK_GEOMETRY_TYPE_INSTANCES_KHR, // VkGeometryTypeKHR geometryType;
3261 makeVkAccelerationStructureInstancesDataKHR(accelerationStructureGeometryInstancesDataKHR), // VkAccelerationStructureGeometryDataKHR geometry;
3262 (VkGeometryFlagsKHR)0u // VkGeometryFlagsKHR flags;
3263 };
3264 }
3265
getRequiredAllocationCount(void)3266 deUint32 TopLevelAccelerationStructure::getRequiredAllocationCount (void)
3267 {
3268 return TopLevelAccelerationStructureKHR::getRequiredAllocationCount();
3269 }
3270
makeTopLevelAccelerationStructure()3271 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure ()
3272 {
3273 return de::MovePtr<TopLevelAccelerationStructure>(new TopLevelAccelerationStructureKHR);
3274 }
3275
queryAccelerationStructureSizeKHR(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3276 bool queryAccelerationStructureSizeKHR (const DeviceInterface& vk,
3277 const VkDevice device,
3278 const VkCommandBuffer cmdBuffer,
3279 const std::vector<VkAccelerationStructureKHR>& accelerationStructureHandles,
3280 VkAccelerationStructureBuildTypeKHR buildType,
3281 const VkQueryPool queryPool,
3282 VkQueryType queryType,
3283 deUint32 firstQuery,
3284 std::vector<VkDeviceSize>& results)
3285 {
3286 DE_ASSERT(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR);
3287
3288 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3289 {
3290 // queryPool must be large enough to contain at least (firstQuery + accelerationStructureHandles.size()) queries
3291 vk.cmdResetQueryPool(cmdBuffer, queryPool, firstQuery, deUint32(accelerationStructureHandles.size()));
3292 vk.cmdWriteAccelerationStructuresPropertiesKHR(cmdBuffer, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType, queryPool, firstQuery);
3293 // results cannot be retrieved to CPU at the moment - you need to do it using getQueryPoolResults after cmdBuffer is executed. Meanwhile function returns a vector of 0s.
3294 results.resize(accelerationStructureHandles.size(), 0u);
3295 return false;
3296 }
3297 // buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
3298 results.resize(accelerationStructureHandles.size(), 0u);
3299 vk.writeAccelerationStructuresPropertiesKHR(device, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType,
3300 sizeof(VkDeviceSize) * accelerationStructureHandles.size(), results.data(), sizeof(VkDeviceSize));
3301 // results will contain proper values
3302 return true;
3303 }
3304
queryAccelerationStructureSize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3305 bool queryAccelerationStructureSize (const DeviceInterface& vk,
3306 const VkDevice device,
3307 const VkCommandBuffer cmdBuffer,
3308 const std::vector<VkAccelerationStructureKHR>& accelerationStructureHandles,
3309 VkAccelerationStructureBuildTypeKHR buildType,
3310 const VkQueryPool queryPool,
3311 VkQueryType queryType,
3312 deUint32 firstQuery,
3313 std::vector<VkDeviceSize>& results)
3314 {
3315 return queryAccelerationStructureSizeKHR(vk, device, cmdBuffer, accelerationStructureHandles, buildType, queryPool, queryType, firstQuery, results);
3316 }
3317
RayTracingPipeline()3318 RayTracingPipeline::RayTracingPipeline ()
3319 : m_shadersModules ()
3320 , m_pipelineLibraries ()
3321 , m_shaderCreateInfos ()
3322 , m_shadersGroupCreateInfos ()
3323 , m_pipelineCreateFlags (0U)
3324 , m_maxRecursionDepth (1U)
3325 , m_maxPayloadSize (0U)
3326 , m_maxAttributeSize (0U)
3327 , m_deferredOperation (false)
3328 , m_workerThreadCount (0)
3329 {
3330 }
3331
~RayTracingPipeline()3332 RayTracingPipeline::~RayTracingPipeline ()
3333 {
3334 }
3335
3336 #define CHECKED_ASSIGN_SHADER(SHADER, STAGE) \
3337 if (SHADER == VK_SHADER_UNUSED_KHR) \
3338 SHADER = STAGE; \
3339 else \
3340 TCU_THROW(InternalError, "Attempt to reassign shader")
3341
addShader(VkShaderStageFlagBits shaderStage,Move<VkShaderModule> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfo,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3342 void RayTracingPipeline::addShader (VkShaderStageFlagBits shaderStage,
3343 Move<VkShaderModule> shaderModule,
3344 deUint32 group,
3345 const VkSpecializationInfo* specializationInfo,
3346 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,
3347 const void* pipelineShaderStageCreateInfopNext)
3348 {
3349 addShader(shaderStage, makeVkSharedPtr(shaderModule), group, specializationInfo, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3350 }
3351
addShader(VkShaderStageFlagBits shaderStage,de::SharedPtr<Move<VkShaderModule>> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3352 void RayTracingPipeline::addShader (VkShaderStageFlagBits shaderStage,
3353 de::SharedPtr<Move<VkShaderModule>> shaderModule,
3354 deUint32 group,
3355 const VkSpecializationInfo* specializationInfoPtr,
3356 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,
3357 const void* pipelineShaderStageCreateInfopNext)
3358 {
3359 addShader(shaderStage, **shaderModule, group, specializationInfoPtr, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3360 m_shadersModules.push_back(shaderModule);
3361 }
3362
addShader(VkShaderStageFlagBits shaderStage,VkShaderModule shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3363 void RayTracingPipeline::addShader (VkShaderStageFlagBits shaderStage,
3364 VkShaderModule shaderModule,
3365 deUint32 group,
3366 const VkSpecializationInfo* specializationInfoPtr,
3367 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,
3368 const void* pipelineShaderStageCreateInfopNext)
3369 {
3370 if (group >= m_shadersGroupCreateInfos.size())
3371 {
3372 for (size_t groupNdx = m_shadersGroupCreateInfos.size(); groupNdx <= group; ++groupNdx)
3373 {
3374 VkRayTracingShaderGroupCreateInfoKHR shaderGroupCreateInfo =
3375 {
3376 VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, // VkStructureType sType;
3377 DE_NULL, // const void* pNext;
3378 VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR, // VkRayTracingShaderGroupTypeKHR type;
3379 VK_SHADER_UNUSED_KHR, // deUint32 generalShader;
3380 VK_SHADER_UNUSED_KHR, // deUint32 closestHitShader;
3381 VK_SHADER_UNUSED_KHR, // deUint32 anyHitShader;
3382 VK_SHADER_UNUSED_KHR, // deUint32 intersectionShader;
3383 DE_NULL, // const void* pShaderGroupCaptureReplayHandle;
3384 };
3385
3386 m_shadersGroupCreateInfos.push_back(shaderGroupCreateInfo);
3387 }
3388 }
3389
3390 const deUint32 shaderStageNdx = (deUint32)m_shaderCreateInfos.size();
3391 VkRayTracingShaderGroupCreateInfoKHR& shaderGroupCreateInfo = m_shadersGroupCreateInfos[group];
3392
3393 switch (shaderStage)
3394 {
3395 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader, shaderStageNdx); break;
3396 case VK_SHADER_STAGE_MISS_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader, shaderStageNdx); break;
3397 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader, shaderStageNdx); break;
3398 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.anyHitShader, shaderStageNdx); break;
3399 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.closestHitShader, shaderStageNdx); break;
3400 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.intersectionShader, shaderStageNdx); break;
3401 default: TCU_THROW(InternalError, "Unacceptable stage");
3402 }
3403
3404 switch (shaderStage)
3405 {
3406 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
3407 case VK_SHADER_STAGE_MISS_BIT_KHR:
3408 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
3409 {
3410 DE_ASSERT(shaderGroupCreateInfo.type == VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR);
3411 shaderGroupCreateInfo.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
3412
3413 break;
3414 }
3415
3416 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
3417 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
3418 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
3419 {
3420 DE_ASSERT(shaderGroupCreateInfo.type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR);
3421 shaderGroupCreateInfo.type = (shaderGroupCreateInfo.intersectionShader == VK_SHADER_UNUSED_KHR)
3422 ? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR
3423 : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR;
3424
3425 break;
3426 }
3427
3428 default: TCU_THROW(InternalError, "Unacceptable stage");
3429 }
3430
3431 {
3432 const VkPipelineShaderStageCreateInfo shaderCreateInfo =
3433 {
3434 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3435 pipelineShaderStageCreateInfopNext, // const void* pNext;
3436 pipelineShaderStageCreateFlags, // VkPipelineShaderStageCreateFlags flags;
3437 shaderStage, // VkShaderStageFlagBits stage;
3438 shaderModule, // VkShaderModule module;
3439 "main", // const char* pName;
3440 specializationInfoPtr, // const VkSpecializationInfo* pSpecializationInfo;
3441 };
3442
3443 m_shaderCreateInfos.push_back(shaderCreateInfo);
3444 }
3445 }
3446
addLibrary(de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)3447 void RayTracingPipeline::addLibrary (de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)
3448 {
3449 m_pipelineLibraries.push_back(pipelineLibrary);
3450 }
3451
createPipelineKHR(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3452 Move<VkPipeline> RayTracingPipeline::createPipelineKHR (const DeviceInterface& vk,
3453 const VkDevice device,
3454 const VkPipelineLayout pipelineLayout,
3455 const std::vector<VkPipeline>& pipelineLibraries,
3456 const VkPipelineCache pipelineCache)
3457 {
3458 for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3459 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3460
3461 VkPipelineLibraryCreateInfoKHR librariesCreateInfo =
3462 {
3463 VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR, // VkStructureType sType;
3464 DE_NULL, // const void* pNext;
3465 de::sizeU32(pipelineLibraries), // deUint32 libraryCount;
3466 de::dataOrNull(pipelineLibraries) // VkPipeline* pLibraries;
3467 };
3468 const VkRayTracingPipelineInterfaceCreateInfoKHR pipelineInterfaceCreateInfo =
3469 {
3470 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR, // VkStructureType sType;
3471 DE_NULL, // const void* pNext;
3472 m_maxPayloadSize, // deUint32 maxPayloadSize;
3473 m_maxAttributeSize // deUint32 maxAttributeSize;
3474 };
3475 const bool addPipelineInterfaceCreateInfo = m_maxPayloadSize != 0 || m_maxAttributeSize != 0;
3476 const VkRayTracingPipelineInterfaceCreateInfoKHR* pipelineInterfaceCreateInfoPtr = addPipelineInterfaceCreateInfo ? &pipelineInterfaceCreateInfo : DE_NULL;
3477 const VkPipelineLibraryCreateInfoKHR* librariesCreateInfoPtr = (pipelineLibraries.empty() ? nullptr : &librariesCreateInfo);
3478
3479 Move<VkDeferredOperationKHR> deferredOperation;
3480 if (m_deferredOperation)
3481 deferredOperation = createDeferredOperationKHR(vk, device);
3482
3483 VkPipelineDynamicStateCreateInfo dynamicStateCreateInfo =
3484 {
3485 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType;
3486 DE_NULL, // const void* pNext;
3487 0, // VkPipelineDynamicStateCreateFlags flags;
3488 static_cast<deUint32>(m_dynamicStates.size() ), // deUint32 dynamicStateCount;
3489 m_dynamicStates.data(), // const VkDynamicState* pDynamicStates;
3490 };
3491
3492 const VkRayTracingPipelineCreateInfoKHR pipelineCreateInfo =
3493 {
3494 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, // VkStructureType sType;
3495 DE_NULL, // const void* pNext;
3496 m_pipelineCreateFlags, // VkPipelineCreateFlags flags;
3497 de::sizeU32(m_shaderCreateInfos), // deUint32 stageCount;
3498 de::dataOrNull(m_shaderCreateInfos), // const VkPipelineShaderStageCreateInfo* pStages;
3499 de::sizeU32(m_shadersGroupCreateInfos), // deUint32 groupCount;
3500 de::dataOrNull(m_shadersGroupCreateInfos), // const VkRayTracingShaderGroupCreateInfoKHR* pGroups;
3501 m_maxRecursionDepth, // deUint32 maxRecursionDepth;
3502 librariesCreateInfoPtr, // VkPipelineLibraryCreateInfoKHR* pLibraryInfo;
3503 pipelineInterfaceCreateInfoPtr, // VkRayTracingPipelineInterfaceCreateInfoKHR* pLibraryInterface;
3504 &dynamicStateCreateInfo, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
3505 pipelineLayout, // VkPipelineLayout layout;
3506 (VkPipeline)DE_NULL, // VkPipeline basePipelineHandle;
3507 0, // deInt32 basePipelineIndex;
3508 };
3509 VkPipeline object = DE_NULL;
3510 VkResult result = vk.createRayTracingPipelinesKHR(device, deferredOperation.get(), pipelineCache, 1u, &pipelineCreateInfo, DE_NULL, &object);
3511 const bool allowCompileRequired = ((m_pipelineCreateFlags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) != 0);
3512
3513 if (m_deferredOperation)
3514 {
3515 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS || (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED));
3516 finishDeferredOperation(vk, device, deferredOperation.get(), m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3517 }
3518
3519 if (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED)
3520 throw CompileRequiredError("createRayTracingPipelinesKHR returned VK_PIPELINE_COMPILE_REQUIRED");
3521
3522 Move<VkPipeline> pipeline (check<VkPipeline>(object), Deleter<VkPipeline>(vk, device, DE_NULL));
3523 return pipeline;
3524 }
3525
3526
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<de::SharedPtr<Move<VkPipeline>>> & pipelineLibraries)3527 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface& vk,
3528 const VkDevice device,
3529 const VkPipelineLayout pipelineLayout,
3530 const std::vector<de::SharedPtr<Move<VkPipeline>>>& pipelineLibraries)
3531 {
3532 std::vector<VkPipeline> rawPipelines;
3533 rawPipelines.reserve(pipelineLibraries.size());
3534 for (const auto& lib : pipelineLibraries)
3535 rawPipelines.push_back(lib.get()->get());
3536
3537 return createPipelineKHR(vk, device, pipelineLayout, rawPipelines);
3538 }
3539
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3540 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface& vk,
3541 const VkDevice device,
3542 const VkPipelineLayout pipelineLayout,
3543 const std::vector<VkPipeline>& pipelineLibraries,
3544 const VkPipelineCache pipelineCache)
3545 {
3546 return createPipelineKHR(vk, device, pipelineLayout, pipelineLibraries, pipelineCache);
3547 }
3548
createPipelineWithLibraries(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout)3549 std::vector<de::SharedPtr<Move<VkPipeline>>> RayTracingPipeline::createPipelineWithLibraries (const DeviceInterface& vk,
3550 const VkDevice device,
3551 const VkPipelineLayout pipelineLayout)
3552 {
3553 for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3554 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3555
3556 DE_ASSERT(m_shaderCreateInfos.size() > 0);
3557 DE_ASSERT(m_shadersGroupCreateInfos.size() > 0);
3558
3559 std::vector<de::SharedPtr<Move<VkPipeline>>> result, allLibraries, firstLibraries;
3560 for(auto it=begin(m_pipelineLibraries), eit=end(m_pipelineLibraries); it!=eit; ++it)
3561 {
3562 auto childLibraries = (*it)->get()->createPipelineWithLibraries(vk, device, pipelineLayout);
3563 DE_ASSERT(childLibraries.size() > 0);
3564 firstLibraries.push_back(childLibraries[0]);
3565 std::copy(begin(childLibraries), end(childLibraries), std::back_inserter(allLibraries));
3566 }
3567 result.push_back(makeVkSharedPtr(createPipeline(vk, device, pipelineLayout, firstLibraries)));
3568 std::copy(begin(allLibraries), end(allLibraries), std::back_inserter(result));
3569 return result;
3570 }
3571
createShaderBindingTable(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,Allocator & allocator,const deUint32 & shaderGroupHandleSize,const deUint32 shaderGroupBaseAlignment,const deUint32 & firstGroup,const deUint32 & groupCount,const VkBufferCreateFlags & additionalBufferCreateFlags,const VkBufferUsageFlags & additionalBufferUsageFlags,const MemoryRequirement & additionalMemoryRequirement,const VkDeviceAddress & opaqueCaptureAddress,const deUint32 shaderBindingTableOffset,const deUint32 shaderRecordSize,const void ** shaderGroupDataPtrPerGroup,const bool autoAlignRecords)3572 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable (const DeviceInterface& vk,
3573 const VkDevice device,
3574 const VkPipeline pipeline,
3575 Allocator& allocator,
3576 const deUint32& shaderGroupHandleSize,
3577 const deUint32 shaderGroupBaseAlignment,
3578 const deUint32& firstGroup,
3579 const deUint32& groupCount,
3580 const VkBufferCreateFlags& additionalBufferCreateFlags,
3581 const VkBufferUsageFlags& additionalBufferUsageFlags,
3582 const MemoryRequirement& additionalMemoryRequirement,
3583 const VkDeviceAddress& opaqueCaptureAddress,
3584 const deUint32 shaderBindingTableOffset,
3585 const deUint32 shaderRecordSize,
3586 const void** shaderGroupDataPtrPerGroup,
3587 const bool autoAlignRecords)
3588 {
3589 DE_ASSERT(shaderGroupBaseAlignment != 0u);
3590 DE_ASSERT((shaderBindingTableOffset % shaderGroupBaseAlignment) == 0);
3591 DE_UNREF(shaderGroupBaseAlignment);
3592
3593 const auto totalEntrySize = (autoAlignRecords ? (deAlign32(shaderGroupHandleSize + shaderRecordSize, shaderGroupHandleSize)) : (shaderGroupHandleSize + shaderRecordSize));
3594 const deUint32 sbtSize = shaderBindingTableOffset + groupCount * totalEntrySize;
3595 const VkBufferUsageFlags sbtFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | additionalBufferUsageFlags;
3596 VkBufferCreateInfo sbtCreateInfo = makeBufferCreateInfo(sbtSize, sbtFlags);
3597 sbtCreateInfo.flags |= additionalBufferCreateFlags;
3598 VkBufferOpaqueCaptureAddressCreateInfo sbtCaptureAddressInfo =
3599 {
3600 VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO, // VkStructureType sType;
3601 DE_NULL, // const void* pNext;
3602 deUint64(opaqueCaptureAddress) // deUint64 opaqueCaptureAddress;
3603 };
3604
3605 if (opaqueCaptureAddress != 0u)
3606 {
3607 sbtCreateInfo.pNext = &sbtCaptureAddressInfo;
3608 sbtCreateInfo.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
3609 }
3610 const MemoryRequirement sbtMemRequirements = MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress | additionalMemoryRequirement;
3611 de::MovePtr<BufferWithMemory> sbtBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, sbtCreateInfo, sbtMemRequirements));
3612 vk::Allocation& sbtAlloc = sbtBuffer->getAllocation();
3613
3614 // collect shader group handles
3615 std::vector<deUint8> shaderHandles (groupCount * shaderGroupHandleSize);
3616 VK_CHECK(getRayTracingShaderGroupHandles(vk, device, pipeline, firstGroup, groupCount, groupCount * shaderGroupHandleSize, shaderHandles.data()));
3617
3618 // reserve place for ShaderRecordKHR after each shader handle ( ShaderRecordKHR size might be 0 ). Also take alignment into consideration
3619 deUint8* shaderBegin = (deUint8*)sbtAlloc.getHostPtr() + shaderBindingTableOffset;
3620 for (deUint32 idx = 0; idx < groupCount; ++idx)
3621 {
3622 deUint8* shaderSrcPos = shaderHandles.data() + idx * shaderGroupHandleSize;
3623 deUint8* shaderDstPos = shaderBegin + idx * totalEntrySize;
3624 deMemcpy(shaderDstPos, shaderSrcPos, shaderGroupHandleSize);
3625
3626 if (shaderGroupDataPtrPerGroup != nullptr &&
3627 shaderGroupDataPtrPerGroup[idx] != nullptr)
3628 {
3629 DE_ASSERT(sbtSize >= static_cast<deUint32>(shaderDstPos - shaderBegin) + shaderGroupHandleSize);
3630
3631 deMemcpy( shaderDstPos + shaderGroupHandleSize,
3632 shaderGroupDataPtrPerGroup[idx],
3633 shaderRecordSize);
3634 }
3635 }
3636
3637 flushMappedMemoryRange(vk, device, sbtAlloc.getMemory(), sbtAlloc.getOffset(), VK_WHOLE_SIZE);
3638
3639 return sbtBuffer;
3640 }
3641
setCreateFlags(const VkPipelineCreateFlags & pipelineCreateFlags)3642 void RayTracingPipeline::setCreateFlags (const VkPipelineCreateFlags& pipelineCreateFlags)
3643 {
3644 m_pipelineCreateFlags = pipelineCreateFlags;
3645 }
3646
setMaxRecursionDepth(const deUint32 & maxRecursionDepth)3647 void RayTracingPipeline::setMaxRecursionDepth (const deUint32& maxRecursionDepth)
3648 {
3649 m_maxRecursionDepth = maxRecursionDepth;
3650 }
3651
setMaxPayloadSize(const deUint32 & maxPayloadSize)3652 void RayTracingPipeline::setMaxPayloadSize (const deUint32& maxPayloadSize)
3653 {
3654 m_maxPayloadSize = maxPayloadSize;
3655 }
3656
setMaxAttributeSize(const deUint32 & maxAttributeSize)3657 void RayTracingPipeline::setMaxAttributeSize (const deUint32& maxAttributeSize)
3658 {
3659 m_maxAttributeSize = maxAttributeSize;
3660 }
3661
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)3662 void RayTracingPipeline::setDeferredOperation (const bool deferredOperation,
3663 const deUint32 workerThreadCount)
3664 {
3665 m_deferredOperation = deferredOperation;
3666 m_workerThreadCount = workerThreadCount;
3667 }
3668
addDynamicState(const VkDynamicState & dynamicState)3669 void RayTracingPipeline::addDynamicState(const VkDynamicState& dynamicState)
3670 {
3671 m_dynamicStates.push_back(dynamicState);
3672 }
3673
3674 class RayTracingPropertiesKHR : public RayTracingProperties
3675 {
3676 public:
3677 RayTracingPropertiesKHR () = delete;
3678 RayTracingPropertiesKHR (const InstanceInterface& vki,
3679 const VkPhysicalDevice physicalDevice);
3680 virtual ~RayTracingPropertiesKHR ();
3681
getShaderGroupHandleSize(void)3682 uint32_t getShaderGroupHandleSize (void) override { return m_rayTracingPipelineProperties.shaderGroupHandleSize; }
getShaderGroupHandleAlignment(void)3683 uint32_t getShaderGroupHandleAlignment (void) override { return m_rayTracingPipelineProperties.shaderGroupHandleAlignment; }
getMaxRecursionDepth(void)3684 uint32_t getMaxRecursionDepth (void) override { return m_rayTracingPipelineProperties.maxRayRecursionDepth; }
getMaxShaderGroupStride(void)3685 uint32_t getMaxShaderGroupStride (void) override { return m_rayTracingPipelineProperties.maxShaderGroupStride; }
getShaderGroupBaseAlignment(void)3686 uint32_t getShaderGroupBaseAlignment (void) override { return m_rayTracingPipelineProperties.shaderGroupBaseAlignment; }
getMaxGeometryCount(void)3687 uint64_t getMaxGeometryCount (void) override { return m_accelerationStructureProperties.maxGeometryCount; }
getMaxInstanceCount(void)3688 uint64_t getMaxInstanceCount (void) override { return m_accelerationStructureProperties.maxInstanceCount; }
getMaxPrimitiveCount(void)3689 uint64_t getMaxPrimitiveCount (void) override { return m_accelerationStructureProperties.maxPrimitiveCount; }
getMaxDescriptorSetAccelerationStructures(void)3690 uint32_t getMaxDescriptorSetAccelerationStructures (void) override { return m_accelerationStructureProperties.maxDescriptorSetAccelerationStructures; }
getMaxRayDispatchInvocationCount(void)3691 uint32_t getMaxRayDispatchInvocationCount (void) override { return m_rayTracingPipelineProperties.maxRayDispatchInvocationCount; }
getMaxRayHitAttributeSize(void)3692 uint32_t getMaxRayHitAttributeSize (void) override { return m_rayTracingPipelineProperties.maxRayHitAttributeSize; }
getMaxMemoryAllocationCount(void)3693 uint32_t getMaxMemoryAllocationCount (void) override { return m_maxMemoryAllocationCount; }
3694
3695 protected:
3696 VkPhysicalDeviceAccelerationStructurePropertiesKHR m_accelerationStructureProperties;
3697 VkPhysicalDeviceRayTracingPipelinePropertiesKHR m_rayTracingPipelineProperties;
3698 deUint32 m_maxMemoryAllocationCount;
3699 };
3700
~RayTracingPropertiesKHR()3701 RayTracingPropertiesKHR::~RayTracingPropertiesKHR ()
3702 {
3703 }
3704
RayTracingPropertiesKHR(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3705 RayTracingPropertiesKHR::RayTracingPropertiesKHR (const InstanceInterface& vki,
3706 const VkPhysicalDevice physicalDevice)
3707 : RayTracingProperties (vki, physicalDevice)
3708 {
3709 m_accelerationStructureProperties = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3710 m_rayTracingPipelineProperties = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3711 m_maxMemoryAllocationCount = getPhysicalDeviceProperties(vki, physicalDevice).limits.maxMemoryAllocationCount;
3712 }
3713
makeRayTracingProperties(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3714 de::MovePtr<RayTracingProperties> makeRayTracingProperties (const InstanceInterface& vki,
3715 const VkPhysicalDevice physicalDevice)
3716 {
3717 return de::MovePtr<RayTracingProperties>(new RayTracingPropertiesKHR(vki, physicalDevice));
3718 }
3719
cmdTraceRaysKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3720 static inline void cmdTraceRaysKHR (const DeviceInterface& vk,
3721 VkCommandBuffer commandBuffer,
3722 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3723 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3724 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3725 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3726 deUint32 width,
3727 deUint32 height,
3728 deUint32 depth)
3729 {
3730 return vk.cmdTraceRaysKHR(commandBuffer,
3731 raygenShaderBindingTableRegion,
3732 missShaderBindingTableRegion,
3733 hitShaderBindingTableRegion,
3734 callableShaderBindingTableRegion,
3735 width,
3736 height,
3737 depth);
3738 }
3739
3740
cmdTraceRays(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3741 void cmdTraceRays (const DeviceInterface& vk,
3742 VkCommandBuffer commandBuffer,
3743 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3744 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3745 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3746 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3747 deUint32 width,
3748 deUint32 height,
3749 deUint32 depth)
3750 {
3751 DE_ASSERT(raygenShaderBindingTableRegion != DE_NULL);
3752 DE_ASSERT(missShaderBindingTableRegion != DE_NULL);
3753 DE_ASSERT(hitShaderBindingTableRegion != DE_NULL);
3754 DE_ASSERT(callableShaderBindingTableRegion != DE_NULL);
3755
3756 return cmdTraceRaysKHR(vk,
3757 commandBuffer,
3758 raygenShaderBindingTableRegion,
3759 missShaderBindingTableRegion,
3760 hitShaderBindingTableRegion,
3761 callableShaderBindingTableRegion,
3762 width,
3763 height,
3764 depth);
3765 }
3766
cmdTraceRaysIndirectKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3767 static inline void cmdTraceRaysIndirectKHR (const DeviceInterface& vk,
3768 VkCommandBuffer commandBuffer,
3769 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3770 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3771 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3772 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3773 VkDeviceAddress indirectDeviceAddress )
3774 {
3775 DE_ASSERT(raygenShaderBindingTableRegion != DE_NULL);
3776 DE_ASSERT(missShaderBindingTableRegion != DE_NULL);
3777 DE_ASSERT(hitShaderBindingTableRegion != DE_NULL);
3778 DE_ASSERT(callableShaderBindingTableRegion != DE_NULL);
3779 DE_ASSERT(indirectDeviceAddress != 0);
3780
3781 return vk.cmdTraceRaysIndirectKHR(commandBuffer,
3782 raygenShaderBindingTableRegion,
3783 missShaderBindingTableRegion,
3784 hitShaderBindingTableRegion,
3785 callableShaderBindingTableRegion,
3786 indirectDeviceAddress);
3787 }
3788
cmdTraceRaysIndirect(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3789 void cmdTraceRaysIndirect (const DeviceInterface& vk,
3790 VkCommandBuffer commandBuffer,
3791 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3792 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3793 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3794 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3795 VkDeviceAddress indirectDeviceAddress)
3796 {
3797 return cmdTraceRaysIndirectKHR(vk,
3798 commandBuffer,
3799 raygenShaderBindingTableRegion,
3800 missShaderBindingTableRegion,
3801 hitShaderBindingTableRegion,
3802 callableShaderBindingTableRegion,
3803 indirectDeviceAddress);
3804 }
3805
cmdTraceRaysIndirect2KHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)3806 static inline void cmdTraceRaysIndirect2KHR (const DeviceInterface& vk,
3807 VkCommandBuffer commandBuffer,
3808 VkDeviceAddress indirectDeviceAddress )
3809 {
3810 DE_ASSERT(indirectDeviceAddress != 0);
3811
3812 return vk.cmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
3813 }
3814
cmdTraceRaysIndirect2(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)3815 void cmdTraceRaysIndirect2 (const DeviceInterface& vk,
3816 VkCommandBuffer commandBuffer,
3817 VkDeviceAddress indirectDeviceAddress)
3818 {
3819 return cmdTraceRaysIndirect2KHR(vk, commandBuffer, indirectDeviceAddress);
3820 }
3821
3822 #else
3823
3824 deUint32 rayTracingDefineAnything()
3825 {
3826 return 0;
3827 }
3828
3829 #endif // CTS_USES_VULKANSC
3830
3831 } // vk
3832