1 /*-------------------------------------------------------------------------
2 * Vulkan CTS Framework
3 * --------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Utilities for creating commonly used Vulkan objects
22 *//*--------------------------------------------------------------------*/
23
24 #include "vkRayTracingUtil.hpp"
25
26 #include "vkRefUtil.hpp"
27 #include "vkQueryUtil.hpp"
28 #include "vkObjUtil.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkCmdUtil.hpp"
31
32 #include "deStringUtil.hpp"
33 #include "deSTLUtil.hpp"
34
35 #include <vector>
36 #include <string>
37 #include <thread>
38 #include <limits>
39 #include <type_traits>
40 #include <map>
41
42 namespace vk
43 {
44
45 #ifndef CTS_USES_VULKANSC
46
47 static const deUint32 WATCHDOG_INTERVAL = 16384; // Touch watchDog every N iterations.
48
49 struct DeferredThreadParams
50 {
51 const DeviceInterface& vk;
52 VkDevice device;
53 VkDeferredOperationKHR deferredOperation;
54 VkResult result;
55 };
56
getFormatSimpleName(vk::VkFormat format)57 std::string getFormatSimpleName (vk::VkFormat format)
58 {
59 constexpr size_t kPrefixLen = 10; // strlen("VK_FORMAT_")
60 return de::toLower(de::toString(format).substr(kPrefixLen));
61 }
62
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)63 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
64 {
65 float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
66 float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
67
68 if ((s < 0) != (t < 0))
69 return false;
70
71 float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
72
73 return a < 0 ?
74 (s <= 0 && s + t >= a) :
75 (s >= 0 && s + t <= a);
76 }
77
78 // Returns true if VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR needs to be supported for the given format.
isMandatoryAccelerationStructureVertexBufferFormat(vk::VkFormat format)79 static bool isMandatoryAccelerationStructureVertexBufferFormat (vk::VkFormat format)
80 {
81 bool mandatory = false;
82
83 switch (format)
84 {
85 case VK_FORMAT_R32G32_SFLOAT:
86 case VK_FORMAT_R32G32B32_SFLOAT:
87 case VK_FORMAT_R16G16_SFLOAT:
88 case VK_FORMAT_R16G16B16A16_SFLOAT:
89 case VK_FORMAT_R16G16_SNORM:
90 case VK_FORMAT_R16G16B16A16_SNORM:
91 mandatory = true;
92 break;
93 default:
94 break;
95 }
96
97 return mandatory;
98 }
99
checkAccelerationStructureVertexBufferFormat(const vk::InstanceInterface & vki,vk::VkPhysicalDevice physicalDevice,vk::VkFormat format)100 void checkAccelerationStructureVertexBufferFormat (const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice, vk::VkFormat format)
101 {
102 const vk::VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
103
104 if ((formatProperties.bufferFeatures & vk::VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR) == 0u)
105 {
106 const std::string errorMsg = "Format not supported for acceleration structure vertex buffers";
107 if (isMandatoryAccelerationStructureVertexBufferFormat(format))
108 TCU_FAIL(errorMsg);
109 TCU_THROW(NotSupportedError, errorMsg);
110 }
111 }
112
getCommonRayGenerationShader(void)113 std::string getCommonRayGenerationShader (void)
114 {
115 return
116 "#version 460 core\n"
117 "#extension GL_EXT_ray_tracing : require\n"
118 "layout(location = 0) rayPayloadEXT vec3 hitValue;\n"
119 "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
120 "\n"
121 "void main()\n"
122 "{\n"
123 " uint rayFlags = 0;\n"
124 " uint cullMask = 0xFF;\n"
125 " float tmin = 0.0;\n"
126 " float tmax = 9.0;\n"
127 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
128 " vec3 direct = vec3(0.0, 0.0, -1.0);\n"
129 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
130 "}\n";
131 }
132
RaytracedGeometryBase(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType)133 RaytracedGeometryBase::RaytracedGeometryBase (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType)
134 : m_geometryType (geometryType)
135 , m_vertexFormat (vertexFormat)
136 , m_indexType (indexType)
137 , m_geometryFlags ((VkGeometryFlagsKHR)0u)
138 , m_hasOpacityMicromap (false)
139 {
140 if (m_geometryType == VK_GEOMETRY_TYPE_AABBS_KHR)
141 DE_ASSERT(m_vertexFormat == VK_FORMAT_R32G32B32_SFLOAT);
142 }
143
~RaytracedGeometryBase()144 RaytracedGeometryBase::~RaytracedGeometryBase ()
145 {
146 }
147
148 struct GeometryBuilderParams
149 {
150 VkGeometryTypeKHR geometryType;
151 bool usePadding;
152 };
153
154 template <typename V, typename I>
buildRaytracedGeometry(const GeometryBuilderParams & params)155 RaytracedGeometryBase* buildRaytracedGeometry (const GeometryBuilderParams& params)
156 {
157 return new RaytracedGeometry<V, I>(params.geometryType, (params.usePadding ? 1u : 0u));
158 }
159
makeRaytracedGeometry(VkGeometryTypeKHR geometryType,VkFormat vertexFormat,VkIndexType indexType,bool padVertices)160 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType, bool padVertices)
161 {
162 const GeometryBuilderParams builderParams { geometryType, padVertices };
163
164 switch (vertexFormat)
165 {
166 case VK_FORMAT_R32G32_SFLOAT:
167 switch (indexType)
168 {
169 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint16>(builderParams));
170 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint32>(builderParams));
171 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, EmptyIndex>(builderParams));
172 default: TCU_THROW(InternalError, "Wrong index type");
173 }
174 case VK_FORMAT_R32G32B32_SFLOAT:
175 switch (indexType)
176 {
177 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint16>(builderParams));
178 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint32>(builderParams));
179 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, EmptyIndex>(builderParams));
180 default: TCU_THROW(InternalError, "Wrong index type");
181 }
182 case VK_FORMAT_R32G32B32A32_SFLOAT:
183 switch (indexType)
184 {
185 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint16>(builderParams));
186 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint32>(builderParams));
187 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, EmptyIndex>(builderParams));
188 default: TCU_THROW(InternalError, "Wrong index type");
189 }
190 case VK_FORMAT_R16G16_SFLOAT:
191 switch (indexType)
192 {
193 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint16>(builderParams));
194 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint32>(builderParams));
195 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, EmptyIndex>(builderParams));
196 default: TCU_THROW(InternalError, "Wrong index type");
197 }
198 case VK_FORMAT_R16G16B16_SFLOAT:
199 switch (indexType)
200 {
201 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint16>(builderParams));
202 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint32>(builderParams));
203 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, EmptyIndex>(builderParams));
204 default: TCU_THROW(InternalError, "Wrong index type");
205 }
206 case VK_FORMAT_R16G16B16A16_SFLOAT:
207 switch (indexType)
208 {
209 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint16>(builderParams));
210 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint32>(builderParams));
211 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, EmptyIndex>(builderParams));
212 default: TCU_THROW(InternalError, "Wrong index type");
213 }
214 case VK_FORMAT_R16G16_SNORM:
215 switch (indexType)
216 {
217 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint16>(builderParams));
218 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint32>(builderParams));
219 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, EmptyIndex>(builderParams));
220 default: TCU_THROW(InternalError, "Wrong index type");
221 }
222 case VK_FORMAT_R16G16B16_SNORM:
223 switch (indexType)
224 {
225 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint16>(builderParams));
226 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint32>(builderParams));
227 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, EmptyIndex>(builderParams));
228 default: TCU_THROW(InternalError, "Wrong index type");
229 }
230 case VK_FORMAT_R16G16B16A16_SNORM:
231 switch (indexType)
232 {
233 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint16>(builderParams));
234 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint32>(builderParams));
235 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, EmptyIndex>(builderParams));
236 default: TCU_THROW(InternalError, "Wrong index type");
237 }
238 case VK_FORMAT_R64G64_SFLOAT:
239 switch (indexType)
240 {
241 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint16>(builderParams));
242 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint32>(builderParams));
243 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, EmptyIndex>(builderParams));
244 default: TCU_THROW(InternalError, "Wrong index type");
245 }
246 case VK_FORMAT_R64G64B64_SFLOAT:
247 switch (indexType)
248 {
249 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint16>(builderParams));
250 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint32>(builderParams));
251 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, EmptyIndex>(builderParams));
252 default: TCU_THROW(InternalError, "Wrong index type");
253 }
254 case VK_FORMAT_R64G64B64A64_SFLOAT:
255 switch (indexType)
256 {
257 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint16>(builderParams));
258 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint32>(builderParams));
259 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, EmptyIndex>(builderParams));
260 default: TCU_THROW(InternalError, "Wrong index type");
261 }
262 case VK_FORMAT_R8G8_SNORM:
263 switch (indexType)
264 {
265 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint16>(builderParams));
266 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint32>(builderParams));
267 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, EmptyIndex>(builderParams));
268 default: TCU_THROW(InternalError, "Wrong index type");
269 }
270 case VK_FORMAT_R8G8B8_SNORM:
271 switch (indexType)
272 {
273 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint16>(builderParams));
274 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint32>(builderParams));
275 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, EmptyIndex>(builderParams));
276 default: TCU_THROW(InternalError, "Wrong index type");
277 }
278 case VK_FORMAT_R8G8B8A8_SNORM:
279 switch (indexType)
280 {
281 case VK_INDEX_TYPE_UINT16: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint16>(builderParams));
282 case VK_INDEX_TYPE_UINT32: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint32>(builderParams));
283 case VK_INDEX_TYPE_NONE_KHR: return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, EmptyIndex>(builderParams));
284 default: TCU_THROW(InternalError, "Wrong index type");
285 }
286 default:
287 TCU_THROW(InternalError, "Wrong vertex format");
288 }
289
290 }
291
getBufferDeviceAddress(const DeviceInterface & vk,const VkDevice device,const VkBuffer buffer,VkDeviceSize offset)292 VkDeviceAddress getBufferDeviceAddress ( const DeviceInterface& vk,
293 const VkDevice device,
294 const VkBuffer buffer,
295 VkDeviceSize offset )
296 {
297
298 if (buffer == DE_NULL)
299 return 0;
300
301 VkBufferDeviceAddressInfo deviceAddressInfo
302 {
303 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType
304 DE_NULL, // const void* pNext
305 buffer // VkBuffer buffer;
306 };
307 return vk.getBufferDeviceAddress(device, &deviceAddressInfo) + offset;
308 }
309
310
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,deUint32 queryCount)311 static inline Move<VkQueryPool> makeQueryPool (const DeviceInterface& vk,
312 const VkDevice device,
313 const VkQueryType queryType,
314 deUint32 queryCount)
315 {
316 const VkQueryPoolCreateInfo queryPoolCreateInfo =
317 {
318 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // sType
319 DE_NULL, // pNext
320 (VkQueryPoolCreateFlags)0, // flags
321 queryType, // queryType
322 queryCount, // queryCount
323 0u, // pipelineStatistics
324 };
325 return createQueryPool(vk, device, &queryPoolCreateInfo);
326 }
327
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryTrianglesDataKHR & triangles)328 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryTrianglesDataKHR& triangles)
329 {
330 VkAccelerationStructureGeometryDataKHR result;
331
332 deMemset(&result, 0, sizeof(result));
333
334 result.triangles = triangles;
335
336 return result;
337 }
338
makeVkAccelerationStructureGeometryDataKHR(const VkAccelerationStructureGeometryAabbsDataKHR & aabbs)339 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryAabbsDataKHR& aabbs)
340 {
341 VkAccelerationStructureGeometryDataKHR result;
342
343 deMemset(&result, 0, sizeof(result));
344
345 result.aabbs = aabbs;
346
347 return result;
348 }
349
makeVkAccelerationStructureInstancesDataKHR(const VkAccelerationStructureGeometryInstancesDataKHR & instances)350 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureInstancesDataKHR (const VkAccelerationStructureGeometryInstancesDataKHR& instances)
351 {
352 VkAccelerationStructureGeometryDataKHR result;
353
354 deMemset(&result, 0, sizeof(result));
355
356 result.instances = instances;
357
358 return result;
359 }
360
makeVkAccelerationStructureInstanceKHR(const VkTransformMatrixKHR & transform,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags,deUint64 accelerationStructureReference)361 static inline VkAccelerationStructureInstanceKHR makeVkAccelerationStructureInstanceKHR (const VkTransformMatrixKHR& transform,
362 deUint32 instanceCustomIndex,
363 deUint32 mask,
364 deUint32 instanceShaderBindingTableRecordOffset,
365 VkGeometryInstanceFlagsKHR flags,
366 deUint64 accelerationStructureReference)
367 {
368 VkAccelerationStructureInstanceKHR instance = { transform, 0, 0, 0, 0, accelerationStructureReference };
369 instance.instanceCustomIndex = instanceCustomIndex & 0xFFFFFF;
370 instance.mask = mask & 0xFF;
371 instance.instanceShaderBindingTableRecordOffset = instanceShaderBindingTableRecordOffset & 0xFFFFFF;
372 instance.flags = flags & 0xFF;
373 return instance;
374 }
375
getRayTracingShaderGroupHandlesKHR(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)376 VkResult getRayTracingShaderGroupHandlesKHR (const DeviceInterface& vk,
377 const VkDevice device,
378 const VkPipeline pipeline,
379 const deUint32 firstGroup,
380 const deUint32 groupCount,
381 const deUintptr dataSize,
382 void* pData)
383 {
384 return vk.getRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
385 }
386
getRayTracingShaderGroupHandles(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,const deUint32 firstGroup,const deUint32 groupCount,const deUintptr dataSize,void * pData)387 VkResult getRayTracingShaderGroupHandles (const DeviceInterface& vk,
388 const VkDevice device,
389 const VkPipeline pipeline,
390 const deUint32 firstGroup,
391 const deUint32 groupCount,
392 const deUintptr dataSize,
393 void* pData)
394 {
395 return getRayTracingShaderGroupHandlesKHR(vk, device, pipeline, firstGroup, groupCount, dataSize, pData);
396 }
397
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation)398 VkResult finishDeferredOperation (const DeviceInterface& vk,
399 VkDevice device,
400 VkDeferredOperationKHR deferredOperation)
401 {
402 VkResult result = vk.deferredOperationJoinKHR(device, deferredOperation);
403
404 while (result == VK_THREAD_IDLE_KHR)
405 {
406 std::this_thread::yield();
407 result = vk.deferredOperationJoinKHR(device, deferredOperation);
408 }
409
410 switch( result )
411 {
412 case VK_SUCCESS:
413 {
414 // Deferred operation has finished. Query its result
415 result = vk.getDeferredOperationResultKHR(device, deferredOperation);
416
417 break;
418 }
419
420 case VK_THREAD_DONE_KHR:
421 {
422 // Deferred operation is being wrapped up by another thread
423 // wait for that thread to finish
424 do
425 {
426 std::this_thread::yield();
427 result = vk.getDeferredOperationResultKHR(device, deferredOperation);
428 } while (result == VK_NOT_READY);
429
430 break;
431 }
432
433 default:
434 {
435 DE_ASSERT(false);
436
437 break;
438 }
439 }
440
441 return result;
442 }
443
finishDeferredOperationThreaded(DeferredThreadParams * deferredThreadParams)444 void finishDeferredOperationThreaded (DeferredThreadParams* deferredThreadParams)
445 {
446 deferredThreadParams->result = finishDeferredOperation(deferredThreadParams->vk, deferredThreadParams->device, deferredThreadParams->deferredOperation);
447 }
448
finishDeferredOperation(const DeviceInterface & vk,VkDevice device,VkDeferredOperationKHR deferredOperation,const deUint32 workerThreadCount,const bool operationNotDeferred)449 void finishDeferredOperation (const DeviceInterface& vk,
450 VkDevice device,
451 VkDeferredOperationKHR deferredOperation,
452 const deUint32 workerThreadCount,
453 const bool operationNotDeferred)
454 {
455
456 if (operationNotDeferred)
457 {
458 // when the operation deferral returns VK_OPERATION_NOT_DEFERRED_KHR,
459 // the deferred operation should act as if no command was deferred
460 VK_CHECK(vk.getDeferredOperationResultKHR(device, deferredOperation));
461
462
463 // there is not need to join any threads to the deferred operation,
464 // so below can be skipped.
465 return;
466 }
467
468 if (workerThreadCount == 0)
469 {
470 VK_CHECK(finishDeferredOperation(vk, device, deferredOperation));
471 }
472 else
473 {
474 const deUint32 maxThreadCountSupported = deMinu32(256u, vk.getDeferredOperationMaxConcurrencyKHR(device, deferredOperation));
475 const deUint32 requestedThreadCount = workerThreadCount;
476 const deUint32 testThreadCount = requestedThreadCount == std::numeric_limits<deUint32>::max() ? maxThreadCountSupported : requestedThreadCount;
477
478 if (maxThreadCountSupported == 0)
479 TCU_FAIL("vkGetDeferredOperationMaxConcurrencyKHR must not return 0");
480
481 const DeferredThreadParams deferredThreadParams =
482 {
483 vk, // const DeviceInterface& vk;
484 device, // VkDevice device;
485 deferredOperation, // VkDeferredOperationKHR deferredOperation;
486 VK_RESULT_MAX_ENUM, // VResult result;
487 };
488 std::vector<DeferredThreadParams> threadParams (testThreadCount, deferredThreadParams);
489 std::vector<de::MovePtr<std::thread> > threads (testThreadCount);
490 bool executionResult = false;
491
492 DE_ASSERT(threads.size() > 0 && threads.size() == testThreadCount);
493
494 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
495 threads[threadNdx] = de::MovePtr<std::thread>(new std::thread(finishDeferredOperationThreaded, &threadParams[threadNdx]));
496
497 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
498 threads[threadNdx]->join();
499
500 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
501 if (threadParams[threadNdx].result == VK_SUCCESS)
502 executionResult = true;
503
504 if (!executionResult)
505 TCU_FAIL("Neither reported VK_SUCCESS");
506 }
507 }
508
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const VkDeviceSize storageSize)509 SerialStorage::SerialStorage (const DeviceInterface& vk,
510 const VkDevice device,
511 Allocator& allocator,
512 const VkAccelerationStructureBuildTypeKHR buildType,
513 const VkDeviceSize storageSize)
514 : m_buildType (buildType)
515 , m_storageSize (storageSize)
516 , m_serialInfo ()
517 {
518 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
519 try
520 {
521 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
522 }
523 catch (const tcu::NotSupportedError&)
524 {
525 // retry without Cached flag
526 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
527 }
528 }
529
SerialStorage(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkAccelerationStructureBuildTypeKHR buildType,const SerialInfo & serialInfo)530 SerialStorage::SerialStorage (const DeviceInterface& vk,
531 const VkDevice device,
532 Allocator& allocator,
533 const VkAccelerationStructureBuildTypeKHR buildType,
534 const SerialInfo& serialInfo)
535 : m_buildType (buildType)
536 , m_storageSize (serialInfo.sizes()[0]) // raise assertion if serialInfo is empty
537 , m_serialInfo (serialInfo)
538 {
539 DE_ASSERT(serialInfo.sizes().size() >= 2u);
540
541 // create buffer for top-level acceleration structure
542 {
543 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
544 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
545 }
546
547 // create buffers for bottom-level acceleration structures
548 {
549 std::vector<deUint64> addrs;
550
551 for (std::size_t i = 1; i < serialInfo.addresses().size(); ++i)
552 {
553 const deUint64& lookAddr = serialInfo.addresses()[i];
554 auto end = addrs.end();
555 auto match = std::find_if(addrs.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
556 if (match == end)
557 {
558 addrs.emplace_back(lookAddr);
559 m_bottoms.emplace_back(de::SharedPtr<SerialStorage>(new SerialStorage(vk, device, allocator, buildType, serialInfo.sizes()[i])));
560 }
561 }
562 }
563 }
564
getAddress(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)565 VkDeviceOrHostAddressKHR SerialStorage::getAddress (const DeviceInterface& vk,
566 const VkDevice device,
567 const VkAccelerationStructureBuildTypeKHR buildType)
568 {
569 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
570 return makeDeviceOrHostAddressKHR(vk, device, m_buffer->get(), 0);
571 else
572 return makeDeviceOrHostAddressKHR(m_buffer->getAllocation().getHostPtr());
573 }
574
getASHeader()575 SerialStorage::AccelerationStructureHeader* SerialStorage::getASHeader ()
576 {
577 return reinterpret_cast<AccelerationStructureHeader*>(getHostAddress().hostAddress);
578 }
579
hasDeepFormat() const580 bool SerialStorage::hasDeepFormat () const
581 {
582 return (m_serialInfo.sizes().size() >= 2u);
583 }
584
getBottomStorage(deUint32 index) const585 de::SharedPtr<SerialStorage> SerialStorage::getBottomStorage (deUint32 index) const
586 {
587 return m_bottoms[index];
588 }
589
getHostAddress(VkDeviceSize offset)590 VkDeviceOrHostAddressKHR SerialStorage::getHostAddress (VkDeviceSize offset)
591 {
592 DE_ASSERT(offset < m_storageSize);
593 return makeDeviceOrHostAddressKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
594 }
595
getHostAddressConst(VkDeviceSize offset)596 VkDeviceOrHostAddressConstKHR SerialStorage::getHostAddressConst (VkDeviceSize offset)
597 {
598 return makeDeviceOrHostAddressConstKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
599 }
600
getAddressConst(const DeviceInterface & vk,const VkDevice device,const VkAccelerationStructureBuildTypeKHR buildType)601 VkDeviceOrHostAddressConstKHR SerialStorage::getAddressConst (const DeviceInterface& vk,
602 const VkDevice device,
603 const VkAccelerationStructureBuildTypeKHR buildType)
604 {
605 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
606 return makeDeviceOrHostAddressConstKHR(vk, device, m_buffer->get(), 0);
607 else
608 return getHostAddressConst();
609 }
610
getStorageSize() const611 inline VkDeviceSize SerialStorage::getStorageSize () const
612 {
613 return m_storageSize;
614 }
615
getSerialInfo() const616 inline const SerialInfo& SerialStorage::getSerialInfo () const
617 {
618 return m_serialInfo;
619 }
620
getDeserializedSize()621 deUint64 SerialStorage::getDeserializedSize ()
622 {
623 deUint64 result = 0;
624 const deUint8* startPtr = static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr());
625
626 DE_ASSERT(sizeof(result) == DESERIALIZED_SIZE_SIZE);
627
628 deMemcpy(&result, startPtr + DESERIALIZED_SIZE_OFFSET, sizeof(result));
629
630 return result;
631 }
632
~BottomLevelAccelerationStructure()633 BottomLevelAccelerationStructure::~BottomLevelAccelerationStructure ()
634 {
635 }
636
BottomLevelAccelerationStructure()637 BottomLevelAccelerationStructure::BottomLevelAccelerationStructure ()
638 : m_structureSize (0u)
639 , m_updateScratchSize (0u)
640 , m_buildScratchSize (0u)
641 {
642 }
643
setGeometryData(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags)644 void BottomLevelAccelerationStructure::setGeometryData (const std::vector<tcu::Vec3>& geometryData,
645 const bool triangles,
646 const VkGeometryFlagsKHR geometryFlags)
647 {
648 if (triangles)
649 DE_ASSERT((geometryData.size() % 3) == 0);
650 else
651 DE_ASSERT((geometryData.size() % 2) == 0);
652
653 setGeometryCount(1u);
654
655 addGeometry(geometryData, triangles, geometryFlags);
656 }
657
setDefaultGeometryData(const VkShaderStageFlagBits testStage,const VkGeometryFlagsKHR geometryFlags)658 void BottomLevelAccelerationStructure::setDefaultGeometryData (const VkShaderStageFlagBits testStage,
659 const VkGeometryFlagsKHR geometryFlags)
660 {
661 bool trianglesData = false;
662 float z = 0.0f;
663 std::vector<tcu::Vec3> geometryData;
664
665 switch (testStage)
666 {
667 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: z = -1.0f; trianglesData = true; break;
668 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: z = -1.0f; trianglesData = true; break;
669 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: z = -1.0f; trianglesData = true; break;
670 case VK_SHADER_STAGE_MISS_BIT_KHR: z = -9.9f; trianglesData = true; break;
671 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: z = -1.0f; trianglesData = false; break;
672 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: z = -1.0f; trianglesData = true; break;
673 default: TCU_THROW(InternalError, "Unacceptable stage");
674 }
675
676 if (trianglesData)
677 {
678 geometryData.reserve(6);
679
680 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
681 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
682 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
683 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
684 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
685 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
686 }
687 else
688 {
689 geometryData.reserve(2);
690
691 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
692 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
693 }
694
695 setGeometryCount(1u);
696
697 addGeometry(geometryData, trianglesData, geometryFlags);
698 }
699
setGeometryCount(const size_t geometryCount)700 void BottomLevelAccelerationStructure::setGeometryCount (const size_t geometryCount)
701 {
702 m_geometriesData.clear();
703
704 m_geometriesData.reserve(geometryCount);
705 }
706
addGeometry(de::SharedPtr<RaytracedGeometryBase> & raytracedGeometry)707 void BottomLevelAccelerationStructure::addGeometry (de::SharedPtr<RaytracedGeometryBase>& raytracedGeometry)
708 {
709 m_geometriesData.push_back(raytracedGeometry);
710 }
711
addGeometry(const std::vector<tcu::Vec3> & geometryData,const bool triangles,const VkGeometryFlagsKHR geometryFlags,const VkAccelerationStructureTrianglesOpacityMicromapEXT * opacityGeometryMicromap)712 void BottomLevelAccelerationStructure::addGeometry (const std::vector<tcu::Vec3>& geometryData,
713 const bool triangles,
714 const VkGeometryFlagsKHR geometryFlags,
715 const VkAccelerationStructureTrianglesOpacityMicromapEXT* opacityGeometryMicromap)
716 {
717 DE_ASSERT(geometryData.size() > 0);
718 DE_ASSERT((triangles && geometryData.size() % 3 == 0) || (!triangles && geometryData.size() % 2 == 0));
719
720 if (!triangles)
721 for (size_t posNdx = 0; posNdx < geometryData.size() / 2; ++posNdx)
722 {
723 DE_ASSERT(geometryData[2 * posNdx].x() <= geometryData[2 * posNdx + 1].x());
724 DE_ASSERT(geometryData[2 * posNdx].y() <= geometryData[2 * posNdx + 1].y());
725 DE_ASSERT(geometryData[2 * posNdx].z() <= geometryData[2 * posNdx + 1].z());
726 }
727
728 de::SharedPtr<RaytracedGeometryBase> geometry = makeRaytracedGeometry(triangles ? VK_GEOMETRY_TYPE_TRIANGLES_KHR : VK_GEOMETRY_TYPE_AABBS_KHR, VK_FORMAT_R32G32B32_SFLOAT, VK_INDEX_TYPE_NONE_KHR);
729 for (auto it = begin(geometryData), eit = end(geometryData); it != eit; ++it)
730 geometry->addVertex(*it);
731
732 geometry->setGeometryFlags(geometryFlags);
733 if (opacityGeometryMicromap)
734 geometry->setOpacityMicromap(opacityGeometryMicromap);
735 addGeometry(geometry);
736 }
737
getStructureBuildSizes() const738 VkAccelerationStructureBuildSizesInfoKHR BottomLevelAccelerationStructure::getStructureBuildSizes () const
739 {
740 return
741 {
742 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
743 DE_NULL, // const void* pNext;
744 m_structureSize, // VkDeviceSize accelerationStructureSize;
745 m_updateScratchSize, // VkDeviceSize updateScratchSize;
746 m_buildScratchSize // VkDeviceSize buildScratchSize;
747 };
748 };
749
getVertexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)750 VkDeviceSize getVertexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
751 {
752 DE_ASSERT(geometriesData.size() != 0);
753 VkDeviceSize bufferSizeBytes = 0;
754 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
755 bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getVertexByteSize(),8);
756 return bufferSizeBytes;
757 }
758
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)759 BufferWithMemory* createVertexBuffer (const DeviceInterface& vk,
760 const VkDevice device,
761 Allocator& allocator,
762 const VkDeviceSize bufferSizeBytes)
763 {
764 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
765 return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
766 }
767
createVertexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)768 BufferWithMemory* createVertexBuffer (const DeviceInterface& vk,
769 const VkDevice device,
770 Allocator& allocator,
771 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
772 {
773 return createVertexBuffer(vk, device, allocator, getVertexBufferSize(geometriesData));
774 }
775
updateVertexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * vertexBuffer,VkDeviceSize geometriesOffset=0)776 void updateVertexBuffer (const DeviceInterface& vk,
777 const VkDevice device,
778 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData,
779 BufferWithMemory* vertexBuffer,
780 VkDeviceSize geometriesOffset = 0)
781 {
782 const Allocation& geometryAlloc = vertexBuffer->getAllocation();
783 deUint8* bufferStart = static_cast<deUint8*>(geometryAlloc.getHostPtr());
784 VkDeviceSize bufferOffset = geometriesOffset;
785
786 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
787 {
788 const void* geometryPtr = geometriesData[geometryNdx]->getVertexPointer();
789 const size_t geometryPtrSize = geometriesData[geometryNdx]->getVertexByteSize();
790
791 deMemcpy(&bufferStart[bufferOffset], geometryPtr, geometryPtrSize);
792
793 bufferOffset += deAlignSize(geometryPtrSize,8);
794 }
795
796 // Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
797 // align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
798 // for the vertex and index buffers, so flushing is actually not needed.
799 flushAlloc(vk, device, geometryAlloc);
800 }
801
getIndexBufferSize(const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)802 VkDeviceSize getIndexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
803 {
804 DE_ASSERT(!geometriesData.empty());
805
806 VkDeviceSize bufferSizeBytes = 0;
807 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
808 if(geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
809 bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getIndexByteSize(),8);
810 return bufferSizeBytes;
811 }
812
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const VkDeviceSize bufferSizeBytes)813 BufferWithMemory* createIndexBuffer (const DeviceInterface& vk,
814 const VkDevice device,
815 Allocator& allocator,
816 const VkDeviceSize bufferSizeBytes)
817 {
818 DE_ASSERT(bufferSizeBytes);
819 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
820 return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
821 }
822
createIndexBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData)823 BufferWithMemory* createIndexBuffer (const DeviceInterface& vk,
824 const VkDevice device,
825 Allocator& allocator,
826 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData)
827 {
828
829
830 const VkDeviceSize bufferSizeBytes = getIndexBufferSize(geometriesData);
831 return bufferSizeBytes ? createIndexBuffer(vk, device, allocator, bufferSizeBytes) : nullptr;
832 }
833
updateIndexBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<RaytracedGeometryBase>> & geometriesData,BufferWithMemory * indexBuffer,VkDeviceSize geometriesOffset)834 void updateIndexBuffer (const DeviceInterface& vk,
835 const VkDevice device,
836 const std::vector<de::SharedPtr<RaytracedGeometryBase>>& geometriesData,
837 BufferWithMemory* indexBuffer,
838 VkDeviceSize geometriesOffset)
839 {
840 const Allocation& indexAlloc = indexBuffer->getAllocation();
841 deUint8* bufferStart = static_cast<deUint8*>(indexAlloc.getHostPtr());
842 VkDeviceSize bufferOffset = geometriesOffset;
843
844 for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
845 {
846 if (geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
847 {
848 const void* indexPtr = geometriesData[geometryNdx]->getIndexPointer();
849 const size_t indexPtrSize = geometriesData[geometryNdx]->getIndexByteSize();
850
851 deMemcpy(&bufferStart[bufferOffset], indexPtr, indexPtrSize);
852
853 bufferOffset += deAlignSize(indexPtrSize, 8);
854 }
855 }
856
857 // Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
858 // align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
859 // for the vertex and index buffers, so flushing is actually not needed.
860 flushAlloc(vk, device, indexAlloc);
861 }
862
863 class BottomLevelAccelerationStructureKHR : public BottomLevelAccelerationStructure
864 {
865 public:
866 static deUint32 getRequiredAllocationCount (void);
867
868 BottomLevelAccelerationStructureKHR ();
869 BottomLevelAccelerationStructureKHR (const BottomLevelAccelerationStructureKHR& other) = delete;
870 virtual ~BottomLevelAccelerationStructureKHR ();
871
872 void setBuildType (const VkAccelerationStructureBuildTypeKHR buildType) override;
873 VkAccelerationStructureBuildTypeKHR getBuildType () const override;
874 void setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags) override;
875 void setCreateGeneric (bool createGeneric) override;
876 void setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags) override;
877 void setBuildWithoutGeometries (bool buildWithoutGeometries) override;
878 void setBuildWithoutPrimitives (bool buildWithoutPrimitives) override;
879 void setDeferredOperation (const bool deferredOperation,
880 const deUint32 workerThreadCount) override;
881 void setUseArrayOfPointers (const bool useArrayOfPointers) override;
882 void setIndirectBuildParameters (const VkBuffer indirectBuffer,
883 const VkDeviceSize indirectBufferOffset,
884 const deUint32 indirectBufferStride) override;
885 VkBuildAccelerationStructureFlagsKHR getBuildFlags () const override;
886
887 void create (const DeviceInterface& vk,
888 const VkDevice device,
889 Allocator& allocator,
890 VkDeviceSize structureSize,
891 VkDeviceAddress deviceAddress = 0u,
892 const void* pNext = DE_NULL,
893 const MemoryRequirement& addMemoryRequirement = MemoryRequirement::Any) override;
894 void build (const DeviceInterface& vk,
895 const VkDevice device,
896 const VkCommandBuffer cmdBuffer) override;
897 void copyFrom (const DeviceInterface& vk,
898 const VkDevice device,
899 const VkCommandBuffer cmdBuffer,
900 BottomLevelAccelerationStructure* accelerationStructure,
901 bool compactCopy) override;
902
903 void serialize (const DeviceInterface& vk,
904 const VkDevice device,
905 const VkCommandBuffer cmdBuffer,
906 SerialStorage* storage) override;
907 void deserialize (const DeviceInterface& vk,
908 const VkDevice device,
909 const VkCommandBuffer cmdBuffer,
910 SerialStorage* storage) override;
911
912 const VkAccelerationStructureKHR* getPtr (void) const override;
913
914 protected:
915 VkAccelerationStructureBuildTypeKHR m_buildType;
916 VkAccelerationStructureCreateFlagsKHR m_createFlags;
917 bool m_createGeneric;
918 VkBuildAccelerationStructureFlagsKHR m_buildFlags;
919 bool m_buildWithoutGeometries;
920 bool m_buildWithoutPrimitives;
921 bool m_deferredOperation;
922 deUint32 m_workerThreadCount;
923 bool m_useArrayOfPointers;
924 de::MovePtr<BufferWithMemory> m_accelerationStructureBuffer;
925 de::MovePtr<BufferWithMemory> m_vertexBuffer;
926 de::MovePtr<BufferWithMemory> m_indexBuffer;
927 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
928 de::UniquePtr<std::vector<deUint8>> m_hostScratchBuffer;
929 Move<VkAccelerationStructureKHR> m_accelerationStructureKHR;
930 VkBuffer m_indirectBuffer;
931 VkDeviceSize m_indirectBufferOffset;
932 deUint32 m_indirectBufferStride;
933
934 void prepareGeometries (const DeviceInterface& vk,
935 const VkDevice device,
936 std::vector<VkAccelerationStructureGeometryKHR>& accelerationStructureGeometriesKHR,
937 std::vector<VkAccelerationStructureGeometryKHR*>& accelerationStructureGeometriesKHRPointers,
938 std::vector<VkAccelerationStructureBuildRangeInfoKHR>& accelerationStructureBuildRangeInfoKHR,
939 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>& accelerationStructureGeometryMicromapsEXT,
940 std::vector<deUint32>& maxPrimitiveCounts,
941 VkDeviceSize vertexBufferOffset = 0,
942 VkDeviceSize indexBufferOffset = 0) const;
943
getAccelerationStructureBuffer() const944 virtual BufferWithMemory* getAccelerationStructureBuffer () const { return m_accelerationStructureBuffer.get(); }
getDeviceScratchBuffer() const945 virtual BufferWithMemory* getDeviceScratchBuffer () const { return m_deviceScratchBuffer.get(); }
getHostScratchBuffer() const946 virtual std::vector<deUint8>* getHostScratchBuffer () const { return m_hostScratchBuffer.get(); }
getVertexBuffer() const947 virtual BufferWithMemory* getVertexBuffer () const { return m_vertexBuffer.get(); }
getIndexBuffer() const948 virtual BufferWithMemory* getIndexBuffer () const { return m_indexBuffer.get(); }
949
getAccelerationStructureBufferOffset() const950 virtual VkDeviceSize getAccelerationStructureBufferOffset () const { return 0; }
getDeviceScratchBufferOffset() const951 virtual VkDeviceSize getDeviceScratchBufferOffset () const { return 0; }
getVertexBufferOffset() const952 virtual VkDeviceSize getVertexBufferOffset () const { return 0; }
getIndexBufferOffset() const953 virtual VkDeviceSize getIndexBufferOffset () const { return 0; }
954 };
955
getRequiredAllocationCount(void)956 deUint32 BottomLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
957 {
958 /*
959 de::MovePtr<BufferWithMemory> m_geometryBuffer; // but only when m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
960 de::MovePtr<Allocation> m_accelerationStructureAlloc;
961 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
962 */
963 return 3u;
964 }
965
~BottomLevelAccelerationStructureKHR()966 BottomLevelAccelerationStructureKHR::~BottomLevelAccelerationStructureKHR ()
967 {
968 }
969
BottomLevelAccelerationStructureKHR()970 BottomLevelAccelerationStructureKHR::BottomLevelAccelerationStructureKHR ()
971 : BottomLevelAccelerationStructure ()
972 , m_buildType (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
973 , m_createFlags (0u)
974 , m_createGeneric (false)
975 , m_buildFlags (0u)
976 , m_buildWithoutGeometries (false)
977 , m_buildWithoutPrimitives (false)
978 , m_deferredOperation (false)
979 , m_workerThreadCount (0)
980 , m_useArrayOfPointers (false)
981 , m_accelerationStructureBuffer (DE_NULL)
982 , m_vertexBuffer (DE_NULL)
983 , m_indexBuffer (DE_NULL)
984 , m_deviceScratchBuffer (DE_NULL)
985 , m_hostScratchBuffer (new std::vector<deUint8>)
986 , m_accelerationStructureKHR ()
987 , m_indirectBuffer (DE_NULL)
988 , m_indirectBufferOffset (0)
989 , m_indirectBufferStride (0)
990 {
991 }
992
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)993 void BottomLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR buildType)
994 {
995 m_buildType = buildType;
996 }
997
getBuildType() const998 VkAccelerationStructureBuildTypeKHR BottomLevelAccelerationStructureKHR::getBuildType () const
999 {
1000 return m_buildType;
1001 }
1002
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)1003 void BottomLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags)
1004 {
1005 m_createFlags = createFlags;
1006 }
1007
setCreateGeneric(bool createGeneric)1008 void BottomLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
1009 {
1010 m_createGeneric = createGeneric;
1011 }
1012
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)1013 void BottomLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags)
1014 {
1015 m_buildFlags = buildFlags;
1016 }
1017
setBuildWithoutGeometries(bool buildWithoutGeometries)1018 void BottomLevelAccelerationStructureKHR::setBuildWithoutGeometries (bool buildWithoutGeometries)
1019 {
1020 m_buildWithoutGeometries = buildWithoutGeometries;
1021 }
1022
setBuildWithoutPrimitives(bool buildWithoutPrimitives)1023 void BottomLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
1024 {
1025 m_buildWithoutPrimitives = buildWithoutPrimitives;
1026 }
1027
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)1028 void BottomLevelAccelerationStructureKHR::setDeferredOperation (const bool deferredOperation,
1029 const deUint32 workerThreadCount)
1030 {
1031 m_deferredOperation = deferredOperation;
1032 m_workerThreadCount = workerThreadCount;
1033 }
1034
setUseArrayOfPointers(const bool useArrayOfPointers)1035 void BottomLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool useArrayOfPointers)
1036 {
1037 m_useArrayOfPointers = useArrayOfPointers;
1038 }
1039
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)1040 void BottomLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer indirectBuffer,
1041 const VkDeviceSize indirectBufferOffset,
1042 const deUint32 indirectBufferStride)
1043 {
1044 m_indirectBuffer = indirectBuffer;
1045 m_indirectBufferOffset = indirectBufferOffset;
1046 m_indirectBufferStride = indirectBufferStride;
1047 }
1048
getBuildFlags() const1049 VkBuildAccelerationStructureFlagsKHR BottomLevelAccelerationStructureKHR::getBuildFlags () const
1050 {
1051 return m_buildFlags;
1052 }
1053
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement)1054 void BottomLevelAccelerationStructureKHR::create (const DeviceInterface& vk,
1055 const VkDevice device,
1056 Allocator& allocator,
1057 VkDeviceSize structureSize,
1058 VkDeviceAddress deviceAddress,
1059 const void* pNext,
1060 const MemoryRequirement& addMemoryRequirement)
1061 {
1062 // AS may be built from geometries using vkCmdBuildAccelerationStructuresKHR / vkBuildAccelerationStructuresKHR
1063 // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
1064 DE_ASSERT(!m_geometriesData.empty() != !(structureSize == 0)); // logical xor
1065
1066 if (structureSize == 0)
1067 {
1068 std::vector<VkAccelerationStructureGeometryKHR> accelerationStructureGeometriesKHR;
1069 std::vector<VkAccelerationStructureGeometryKHR*> accelerationStructureGeometriesKHRPointers;
1070 std::vector<VkAccelerationStructureBuildRangeInfoKHR> accelerationStructureBuildRangeInfoKHR;
1071 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1072 std::vector<deUint32> maxPrimitiveCounts;
1073 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
1074
1075 const VkAccelerationStructureGeometryKHR* accelerationStructureGeometriesKHRPointer = accelerationStructureGeometriesKHR.data();
1076 const VkAccelerationStructureGeometryKHR* const* accelerationStructureGeometry = accelerationStructureGeometriesKHRPointers.data();
1077
1078 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
1079 {
1080 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
1081 DE_NULL, // const void* pNext;
1082 VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
1083 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
1084 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
1085 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
1086 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
1087 static_cast<deUint32>(accelerationStructureGeometriesKHR.size()), // deUint32 geometryCount;
1088 m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer, // const VkAccelerationStructureGeometryKHR* pGeometries;
1089 m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL, // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
1090 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
1091 };
1092 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
1093 {
1094 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
1095 DE_NULL, // const void* pNext;
1096 0, // VkDeviceSize accelerationStructureSize;
1097 0, // VkDeviceSize updateScratchSize;
1098 0 // VkDeviceSize buildScratchSize;
1099 };
1100
1101 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
1102
1103 m_structureSize = sizeInfo.accelerationStructureSize;
1104 m_updateScratchSize = sizeInfo.updateScratchSize;
1105 m_buildScratchSize = sizeInfo.buildScratchSize;
1106 }
1107 else
1108 {
1109 m_structureSize = structureSize;
1110 m_updateScratchSize = 0u;
1111 m_buildScratchSize = 0u;
1112 }
1113
1114 {
1115 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1116 const MemoryRequirement memoryRequirement = addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
1117
1118 try
1119 {
1120 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
1121 }
1122 catch (const tcu::NotSupportedError&)
1123 {
1124 // retry without Cached flag
1125 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
1126 }
1127 }
1128
1129 {
1130 const VkAccelerationStructureTypeKHR structureType = (m_createGeneric
1131 ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
1132 : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
1133 const VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfoKHR
1134 {
1135 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // VkStructureType sType;
1136 pNext, // const void* pNext;
1137 m_createFlags, // VkAccelerationStructureCreateFlagsKHR createFlags;
1138 getAccelerationStructureBuffer()->get(), // VkBuffer buffer;
1139 getAccelerationStructureBufferOffset(), // VkDeviceSize offset;
1140 m_structureSize, // VkDeviceSize size;
1141 structureType, // VkAccelerationStructureTypeKHR type;
1142 deviceAddress // VkDeviceAddress deviceAddress;
1143 };
1144
1145 m_accelerationStructureKHR = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
1146 }
1147
1148 if (m_buildScratchSize > 0u)
1149 {
1150 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1151 {
1152 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1153 m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1154 }
1155 else
1156 {
1157 m_hostScratchBuffer->resize(static_cast<size_t>(m_buildScratchSize));
1158 }
1159 }
1160
1161 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR && !m_geometriesData.empty())
1162 {
1163 m_vertexBuffer = de::MovePtr<BufferWithMemory>(createVertexBuffer(vk, device, allocator, m_geometriesData));
1164 m_indexBuffer = de::MovePtr<BufferWithMemory>(createIndexBuffer(vk, device, allocator, m_geometriesData));
1165 }
1166 }
1167
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer)1168 void BottomLevelAccelerationStructureKHR::build (const DeviceInterface& vk,
1169 const VkDevice device,
1170 const VkCommandBuffer cmdBuffer)
1171 {
1172 DE_ASSERT(!m_geometriesData.empty());
1173 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1174 DE_ASSERT(m_buildScratchSize != 0);
1175
1176 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1177 {
1178 updateVertexBuffer(vk, device, m_geometriesData, getVertexBuffer(), getVertexBufferOffset());
1179 if(getIndexBuffer() != DE_NULL)
1180 updateIndexBuffer(vk, device, m_geometriesData, getIndexBuffer(), getIndexBufferOffset());
1181 }
1182
1183 {
1184 std::vector<VkAccelerationStructureGeometryKHR> accelerationStructureGeometriesKHR;
1185 std::vector<VkAccelerationStructureGeometryKHR*> accelerationStructureGeometriesKHRPointers;
1186 std::vector<VkAccelerationStructureBuildRangeInfoKHR> accelerationStructureBuildRangeInfoKHR;
1187 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1188 std::vector<deUint32> maxPrimitiveCounts;
1189
1190 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers,
1191 accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts, getVertexBufferOffset(), getIndexBufferOffset());
1192
1193 const VkAccelerationStructureGeometryKHR* accelerationStructureGeometriesKHRPointer = accelerationStructureGeometriesKHR.data();
1194 const VkAccelerationStructureGeometryKHR* const* accelerationStructureGeometry = accelerationStructureGeometriesKHRPointers.data();
1195 VkDeviceOrHostAddressKHR scratchData = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1196 ? makeDeviceOrHostAddressKHR(vk, device, getDeviceScratchBuffer()->get(), getDeviceScratchBufferOffset())
1197 : makeDeviceOrHostAddressKHR(getHostScratchBuffer()->data());
1198 const deUint32 geometryCount = (m_buildWithoutGeometries
1199 ? 0u
1200 : static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1201
1202 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
1203 {
1204 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
1205 DE_NULL, // const void* pNext;
1206 VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
1207 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
1208 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
1209 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
1210 m_accelerationStructureKHR.get(), // VkAccelerationStructureKHR dstAccelerationStructure;
1211 geometryCount, // deUint32 geometryCount;
1212 m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer, // const VkAccelerationStructureGeometryKHR* pGeometries;
1213 m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL, // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
1214 scratchData // VkDeviceOrHostAddressKHR scratchData;
1215 };
1216
1217 VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr = accelerationStructureBuildRangeInfoKHR.data();
1218
1219 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1220 {
1221 if (m_indirectBuffer == DE_NULL)
1222 vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1223 else
1224 {
1225 VkDeviceAddress indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
1226 deUint32* pMaxPrimitiveCounts = maxPrimitiveCounts.data();
1227 vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
1228 }
1229 }
1230 else if (!m_deferredOperation)
1231 {
1232 VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
1233 }
1234 else
1235 {
1236 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1237 const auto deferredOperation = deferredOperationPtr.get();
1238
1239 VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1240
1241 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1242
1243 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1244 }
1245 }
1246
1247 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1248 {
1249 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1250 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1251
1252 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1253 }
1254 }
1255
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,BottomLevelAccelerationStructure * accelerationStructure,bool compactCopy)1256 void BottomLevelAccelerationStructureKHR::copyFrom (const DeviceInterface& vk,
1257 const VkDevice device,
1258 const VkCommandBuffer cmdBuffer,
1259 BottomLevelAccelerationStructure* accelerationStructure,
1260 bool compactCopy)
1261 {
1262 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1263 DE_ASSERT(accelerationStructure != DE_NULL);
1264
1265 VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1266 {
1267 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
1268 DE_NULL, // const void* pNext;
1269 *(accelerationStructure->getPtr()), // VkAccelerationStructureKHR src;
1270 *(getPtr()), // VkAccelerationStructureKHR dst;
1271 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR // VkCopyAccelerationStructureModeKHR mode;
1272 };
1273
1274 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1275 {
1276 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
1277 }
1278 else if (!m_deferredOperation)
1279 {
1280 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
1281 }
1282 else
1283 {
1284 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1285 const auto deferredOperation = deferredOperationPtr.get();
1286
1287 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
1288
1289 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1290
1291 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1292 }
1293
1294 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1295 {
1296 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1297 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1298
1299 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1300 }
1301 }
1302
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1303 void BottomLevelAccelerationStructureKHR::serialize (const DeviceInterface& vk,
1304 const VkDevice device,
1305 const VkCommandBuffer cmdBuffer,
1306 SerialStorage* storage)
1307 {
1308 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1309 DE_ASSERT(storage != DE_NULL);
1310
1311 const VkCopyAccelerationStructureToMemoryInfoKHR copyAccelerationStructureInfo =
1312 {
1313 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, // VkStructureType sType;
1314 DE_NULL, // const void* pNext;
1315 *(getPtr()), // VkAccelerationStructureKHR src;
1316 storage->getAddress(vk, device, m_buildType), // VkDeviceOrHostAddressKHR dst;
1317 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
1318 };
1319
1320 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1321 {
1322 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, ©AccelerationStructureInfo);
1323 }
1324 else if (!m_deferredOperation)
1325 {
1326 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, ©AccelerationStructureInfo));
1327 }
1328 else
1329 {
1330 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1331 const auto deferredOperation = deferredOperationPtr.get();
1332
1333 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, ©AccelerationStructureInfo);
1334
1335 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1336
1337 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1338 }
1339 }
1340
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)1341 void BottomLevelAccelerationStructureKHR::deserialize (const DeviceInterface& vk,
1342 const VkDevice device,
1343 const VkCommandBuffer cmdBuffer,
1344 SerialStorage* storage)
1345 {
1346 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1347 DE_ASSERT(storage != DE_NULL);
1348
1349 const VkCopyMemoryToAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1350 {
1351 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
1352 DE_NULL, // const void* pNext;
1353 storage->getAddressConst(vk, device, m_buildType), // VkDeviceOrHostAddressConstKHR src;
1354 *(getPtr()), // VkAccelerationStructureKHR dst;
1355 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
1356 };
1357
1358 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1359 {
1360 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
1361 }
1362 else if (!m_deferredOperation)
1363 {
1364 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
1365 }
1366 else
1367 {
1368 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1369 const auto deferredOperation = deferredOperationPtr.get();
1370
1371 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
1372
1373 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1374
1375 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1376 }
1377
1378 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1379 {
1380 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1381 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1382
1383 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1384 }
1385 }
1386
getPtr(void) const1387 const VkAccelerationStructureKHR* BottomLevelAccelerationStructureKHR::getPtr (void) const
1388 {
1389 return &m_accelerationStructureKHR.get();
1390 }
1391
prepareGeometries(const DeviceInterface & vk,const VkDevice device,std::vector<VkAccelerationStructureGeometryKHR> & accelerationStructureGeometriesKHR,std::vector<VkAccelerationStructureGeometryKHR * > & accelerationStructureGeometriesKHRPointers,std::vector<VkAccelerationStructureBuildRangeInfoKHR> & accelerationStructureBuildRangeInfoKHR,std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> & accelerationStructureGeometryMicromapsEXT,std::vector<deUint32> & maxPrimitiveCounts,VkDeviceSize vertexBufferOffset,VkDeviceSize indexBufferOffset) const1392 void BottomLevelAccelerationStructureKHR::prepareGeometries (const DeviceInterface& vk,
1393 const VkDevice device,
1394 std::vector<VkAccelerationStructureGeometryKHR>& accelerationStructureGeometriesKHR,
1395 std::vector<VkAccelerationStructureGeometryKHR*>& accelerationStructureGeometriesKHRPointers,
1396 std::vector<VkAccelerationStructureBuildRangeInfoKHR>& accelerationStructureBuildRangeInfoKHR,
1397 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>& accelerationStructureGeometryMicromapsEXT,
1398 std::vector<deUint32>& maxPrimitiveCounts,
1399 VkDeviceSize vertexBufferOffset,
1400 VkDeviceSize indexBufferOffset) const
1401 {
1402 accelerationStructureGeometriesKHR.resize(m_geometriesData.size());
1403 accelerationStructureGeometriesKHRPointers.resize(m_geometriesData.size());
1404 accelerationStructureBuildRangeInfoKHR.resize(m_geometriesData.size());
1405 accelerationStructureGeometryMicromapsEXT.resize(m_geometriesData.size());
1406 maxPrimitiveCounts.resize(m_geometriesData.size());
1407
1408 for (size_t geometryNdx = 0; geometryNdx < m_geometriesData.size(); ++geometryNdx)
1409 {
1410 const de::SharedPtr<RaytracedGeometryBase>& geometryData = m_geometriesData[geometryNdx];
1411 VkDeviceOrHostAddressConstKHR vertexData, indexData;
1412 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1413 {
1414 if (getVertexBuffer() != DE_NULL)
1415 {
1416 vertexData = makeDeviceOrHostAddressConstKHR(vk, device, getVertexBuffer()->get(), vertexBufferOffset);
1417 if (m_indirectBuffer == DE_NULL )
1418 {
1419 vertexBufferOffset += deAlignSize(geometryData->getVertexByteSize(), 8);
1420 }
1421 }
1422 else
1423 vertexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1424
1425 if (getIndexBuffer() != DE_NULL && geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1426 {
1427 indexData = makeDeviceOrHostAddressConstKHR(vk, device, getIndexBuffer()->get(), indexBufferOffset);
1428 indexBufferOffset += deAlignSize(geometryData->getIndexByteSize(), 8);
1429 }
1430 else
1431 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1432 }
1433 else
1434 {
1435 vertexData = makeDeviceOrHostAddressConstKHR(geometryData->getVertexPointer());
1436 if (geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1437 indexData = makeDeviceOrHostAddressConstKHR(geometryData->getIndexPointer());
1438 else
1439 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1440 }
1441
1442 VkAccelerationStructureGeometryTrianglesDataKHR accelerationStructureGeometryTrianglesDataKHR =
1443 {
1444 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // VkStructureType sType;
1445 DE_NULL, // const void* pNext;
1446 geometryData->getVertexFormat(), // VkFormat vertexFormat;
1447 vertexData, // VkDeviceOrHostAddressConstKHR vertexData;
1448 geometryData->getVertexStride(), // VkDeviceSize vertexStride;
1449 static_cast<deUint32>(geometryData->getVertexCount()), // uint32_t maxVertex;
1450 geometryData->getIndexType(), // VkIndexType indexType;
1451 indexData, // VkDeviceOrHostAddressConstKHR indexData;
1452 makeDeviceOrHostAddressConstKHR(DE_NULL), // VkDeviceOrHostAddressConstKHR transformData;
1453 };
1454
1455 if (geometryData->getHasOpacityMicromap())
1456 accelerationStructureGeometryTrianglesDataKHR.pNext = &geometryData->getOpacityMicromap();
1457
1458 const VkAccelerationStructureGeometryAabbsDataKHR accelerationStructureGeometryAabbsDataKHR =
1459 {
1460 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // VkStructureType sType;
1461 DE_NULL, // const void* pNext;
1462 vertexData, // VkDeviceOrHostAddressConstKHR data;
1463 geometryData->getAABBStride() // VkDeviceSize stride;
1464 };
1465 const VkAccelerationStructureGeometryDataKHR geometry = (geometryData->isTrianglesType())
1466 ? makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryTrianglesDataKHR)
1467 : makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryAabbsDataKHR);
1468 const VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR =
1469 {
1470 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // VkStructureType sType;
1471 DE_NULL, // const void* pNext;
1472 geometryData->getGeometryType(), // VkGeometryTypeKHR geometryType;
1473 geometry, // VkAccelerationStructureGeometryDataKHR geometry;
1474 geometryData->getGeometryFlags() // VkGeometryFlagsKHR flags;
1475 };
1476
1477 const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : geometryData->getPrimitiveCount());
1478
1479 const VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfosKHR =
1480 {
1481 primitiveCount, // deUint32 primitiveCount;
1482 0, // deUint32 primitiveOffset;
1483 0, // deUint32 firstVertex;
1484 0 // deUint32 firstTransform;
1485 };
1486
1487 accelerationStructureGeometriesKHR[geometryNdx] = accelerationStructureGeometryKHR;
1488 accelerationStructureGeometriesKHRPointers[geometryNdx] = &accelerationStructureGeometriesKHR[geometryNdx];
1489 accelerationStructureBuildRangeInfoKHR[geometryNdx] = accelerationStructureBuildRangeInfosKHR;
1490 maxPrimitiveCounts[geometryNdx] = geometryData->getPrimitiveCount();
1491 }
1492 }
1493
getRequiredAllocationCount(void)1494 deUint32 BottomLevelAccelerationStructure::getRequiredAllocationCount (void)
1495 {
1496 return BottomLevelAccelerationStructureKHR::getRequiredAllocationCount();
1497 }
1498
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)1499 void BottomLevelAccelerationStructure::createAndBuild (const DeviceInterface& vk,
1500 const VkDevice device,
1501 const VkCommandBuffer cmdBuffer,
1502 Allocator& allocator,
1503 VkDeviceAddress deviceAddress)
1504 {
1505 create(vk, device, allocator, 0u, deviceAddress);
1506 build(vk, device, cmdBuffer);
1507 }
1508
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,BottomLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)1509 void BottomLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface& vk,
1510 const VkDevice device,
1511 const VkCommandBuffer cmdBuffer,
1512 Allocator& allocator,
1513 BottomLevelAccelerationStructure* accelerationStructure,
1514 VkDeviceSize compactCopySize,
1515 VkDeviceAddress deviceAddress)
1516 {
1517 DE_ASSERT(accelerationStructure != NULL);
1518 VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
1519 DE_ASSERT(copiedSize != 0u);
1520
1521 create(vk, device, allocator, copiedSize, deviceAddress);
1522 copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
1523 }
1524
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)1525 void BottomLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
1526 const VkDevice device,
1527 const VkCommandBuffer cmdBuffer,
1528 Allocator& allocator,
1529 SerialStorage* storage,
1530 VkDeviceAddress deviceAddress )
1531 {
1532 DE_ASSERT(storage != NULL);
1533 DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
1534 create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
1535 deserialize(vk, device, cmdBuffer, storage);
1536 }
1537
makeBottomLevelAccelerationStructure()1538 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure ()
1539 {
1540 return de::MovePtr<BottomLevelAccelerationStructure>(new BottomLevelAccelerationStructureKHR);
1541 }
1542
1543 // Forward declaration
1544 struct BottomLevelAccelerationStructurePoolImpl;
1545
1546 class BottomLevelAccelerationStructurePoolMember : public BottomLevelAccelerationStructureKHR
1547 {
1548 public:
1549 friend class BottomLevelAccelerationStructurePool;
1550
1551 BottomLevelAccelerationStructurePoolMember (BottomLevelAccelerationStructurePoolImpl& pool);
1552 BottomLevelAccelerationStructurePoolMember (const BottomLevelAccelerationStructurePoolMember&) = delete;
1553 BottomLevelAccelerationStructurePoolMember (BottomLevelAccelerationStructurePoolMember&&) = delete;
1554 virtual ~BottomLevelAccelerationStructurePoolMember () = default;
1555
create(const DeviceInterface &,const VkDevice,Allocator &,VkDeviceSize,VkDeviceAddress,const void *,const MemoryRequirement &)1556 virtual void create (const DeviceInterface&,
1557 const VkDevice,
1558 Allocator&,
1559 VkDeviceSize,
1560 VkDeviceAddress,
1561 const void*,
1562 const MemoryRequirement&) override
1563 {
1564 DE_ASSERT(0); // Silent this method
1565 }
1566 virtual auto computeBuildSize (const DeviceInterface& vk,
1567 const VkDevice device,
1568 const VkDeviceSize strSize) const
1569 // accStrSize,updateScratch, buildScratch, vertexSize, indexSize
1570 -> std::tuple<VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize>;
1571 protected:
1572 struct Info;
1573 virtual void preCreateSetSizesAndOffsets (const Info& info,
1574 const VkDeviceSize accStrSize,
1575 const VkDeviceSize updateScratchSize,
1576 const VkDeviceSize buildScratchSize);
1577 virtual void createAccellerationStructure (const DeviceInterface& vk,
1578 const VkDevice device,
1579 VkDeviceAddress deviceAddress);
1580
1581 virtual BufferWithMemory* getAccelerationStructureBuffer () const override;
1582 virtual BufferWithMemory* getDeviceScratchBuffer () const override;
1583 virtual std::vector<deUint8>* getHostScratchBuffer () const override;
1584 virtual BufferWithMemory* getVertexBuffer () const override;
1585 virtual BufferWithMemory* getIndexBuffer () const override;
1586
getAccelerationStructureBufferOffset() const1587 virtual VkDeviceSize getAccelerationStructureBufferOffset () const override { return m_info.accStrOffset; }
getDeviceScratchBufferOffset() const1588 virtual VkDeviceSize getDeviceScratchBufferOffset () const override { return m_info.buildScratchBuffOffset; }
getVertexBufferOffset() const1589 virtual VkDeviceSize getVertexBufferOffset () const override { return m_info.vertBuffOffset; }
getIndexBufferOffset() const1590 virtual VkDeviceSize getIndexBufferOffset () const override { return m_info.indexBuffOffset; }
1591
1592 BottomLevelAccelerationStructurePoolImpl& m_pool;
1593
1594 struct Info
1595 {
1596 deUint32 accStrIndex;
1597 VkDeviceSize accStrOffset;
1598 deUint32 vertBuffIndex;
1599 VkDeviceSize vertBuffOffset;
1600 deUint32 indexBuffIndex;
1601 VkDeviceSize indexBuffOffset;
1602 deUint32 buildScratchBuffIndex;
1603 VkDeviceSize buildScratchBuffOffset;
1604 } m_info;
1605 };
1606
negz(const X &)1607 template<class X> inline X negz (const X&)
1608 {
1609 return (~static_cast<X>(0));
1610 }
isnegz(const X & x)1611 template<class X> inline bool isnegz (const X& x)
1612 {
1613 return x == negz(x);
1614 }
make_unsigned(const Y & y)1615 template<class Y> inline auto make_unsigned(const Y& y) -> typename std::make_unsigned<Y>::type
1616 {
1617 return static_cast<typename std::make_unsigned<Y>::type>(y);
1618 }
1619
BottomLevelAccelerationStructurePoolMember(BottomLevelAccelerationStructurePoolImpl & pool)1620 BottomLevelAccelerationStructurePoolMember::BottomLevelAccelerationStructurePoolMember (BottomLevelAccelerationStructurePoolImpl& pool)
1621 : m_pool (pool)
1622 , m_info {}
1623 {
1624 }
1625
1626 struct BottomLevelAccelerationStructurePoolImpl
1627 {
1628 BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePoolImpl&&) = delete;
1629 BottomLevelAccelerationStructurePoolImpl (const BottomLevelAccelerationStructurePoolImpl&) = delete;
1630 BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool);
1631
1632 BottomLevelAccelerationStructurePool& m_pool;
1633 std::vector<de::SharedPtr<BufferWithMemory>> m_accellerationStructureBuffers;
1634 de::SharedPtr<BufferWithMemory> m_deviceScratchBuffer;
1635 de::UniquePtr<std::vector<deUint8>> m_hostScratchBuffer;
1636 std::vector<de::SharedPtr<BufferWithMemory>> m_vertexBuffers;
1637 std::vector<de::SharedPtr<BufferWithMemory>> m_indexBuffers;
1638 };
BottomLevelAccelerationStructurePoolImpl(BottomLevelAccelerationStructurePool & pool)1639 BottomLevelAccelerationStructurePoolImpl::BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool)
1640 : m_pool (pool)
1641 , m_accellerationStructureBuffers ()
1642 , m_deviceScratchBuffer ()
1643 , m_hostScratchBuffer (new std::vector<deUint8>)
1644 , m_vertexBuffers ()
1645 , m_indexBuffers ()
1646 {
1647 }
getAccelerationStructureBuffer() const1648 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getAccelerationStructureBuffer () const
1649 {
1650 BufferWithMemory* result = nullptr;
1651 if (m_pool.m_accellerationStructureBuffers.size())
1652 {
1653 DE_ASSERT(!isnegz(m_info.accStrIndex));
1654 result = m_pool.m_accellerationStructureBuffers[m_info.accStrIndex].get();
1655 }
1656 return result;
1657 }
getDeviceScratchBuffer() const1658 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getDeviceScratchBuffer () const
1659 {
1660 DE_ASSERT(m_info.buildScratchBuffIndex == 0);
1661 return m_pool.m_deviceScratchBuffer.get();
1662 }
getHostScratchBuffer() const1663 std::vector<deUint8>* BottomLevelAccelerationStructurePoolMember::getHostScratchBuffer () const
1664 {
1665 return this->m_buildScratchSize ? m_pool.m_hostScratchBuffer.get() : nullptr;
1666 }
1667
getVertexBuffer() const1668 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getVertexBuffer () const
1669 {
1670 BufferWithMemory* result = nullptr;
1671 if (m_pool.m_vertexBuffers.size())
1672 {
1673 DE_ASSERT(!isnegz(m_info.vertBuffIndex));
1674 result = m_pool.m_vertexBuffers[m_info.vertBuffIndex].get();
1675 }
1676 return result;
1677 }
getIndexBuffer() const1678 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getIndexBuffer () const
1679 {
1680 BufferWithMemory* result = nullptr;
1681 if (m_pool.m_indexBuffers.size())
1682 {
1683 DE_ASSERT(!isnegz(m_info.indexBuffIndex));
1684 result = m_pool.m_indexBuffers[m_info.indexBuffIndex].get();
1685 }
1686 return result;
1687 }
1688
1689 struct BottomLevelAccelerationStructurePool::Impl : BottomLevelAccelerationStructurePoolImpl
1690 {
1691 friend class BottomLevelAccelerationStructurePool;
1692 friend class BottomLevelAccelerationStructurePoolMember;
1693
Implvk::BottomLevelAccelerationStructurePool::Impl1694 Impl (BottomLevelAccelerationStructurePool& pool)
1695 : BottomLevelAccelerationStructurePoolImpl(pool) { }
1696 };
1697
BottomLevelAccelerationStructurePool()1698 BottomLevelAccelerationStructurePool::BottomLevelAccelerationStructurePool ()
1699 : m_batchStructCount (4)
1700 , m_batchGeomCount (0)
1701 , m_infos ()
1702 , m_structs ()
1703 , m_createOnce (false)
1704 , m_tryCachedMemory (true)
1705 , m_structsBuffSize (0)
1706 , m_updatesScratchSize (0)
1707 , m_buildsScratchSize (0)
1708 , m_verticesSize (0)
1709 , m_indicesSize (0)
1710 , m_impl (new Impl(*this))
1711 {
1712 }
1713
~BottomLevelAccelerationStructurePool()1714 BottomLevelAccelerationStructurePool::~BottomLevelAccelerationStructurePool()
1715 {
1716 delete m_impl;
1717 }
1718
batchStructCount(const deUint32 & value)1719 void BottomLevelAccelerationStructurePool::batchStructCount (const deUint32& value)
1720 {
1721 DE_ASSERT(value >= 1); m_batchStructCount = value;
1722 }
1723
add(VkDeviceSize structureSize,VkDeviceAddress deviceAddress)1724 auto BottomLevelAccelerationStructurePool::add (VkDeviceSize structureSize,
1725 VkDeviceAddress deviceAddress) -> BottomLevelAccelerationStructurePool::BlasPtr
1726 {
1727 // Prevent a programmer from calling this method after batchCreate(...) method has been called.
1728 if (m_createOnce) DE_ASSERT(0);
1729
1730 auto blas = new BottomLevelAccelerationStructurePoolMember(*m_impl);
1731 m_infos.push_back({structureSize, deviceAddress});
1732 m_structs.emplace_back(blas);
1733 return m_structs.back();
1734 }
1735
adjustBatchCount(const DeviceInterface & vkd,const VkDevice device,const std::vector<BottomLevelAccelerationStructurePool::BlasPtr> & structs,const std::vector<BottomLevelAccelerationStructurePool::BlasInfo> & infos,const VkDeviceSize maxBufferSize,deUint32 (& result)[4])1736 void adjustBatchCount (const DeviceInterface& vkd,
1737 const VkDevice device,
1738 const std::vector<BottomLevelAccelerationStructurePool::BlasPtr>& structs,
1739 const std::vector<BottomLevelAccelerationStructurePool::BlasInfo>& infos,
1740 const VkDeviceSize maxBufferSize,
1741 deUint32 (&result)[4])
1742 {
1743 tcu::Vector<VkDeviceSize, 4> sizes(0);
1744 tcu::Vector<VkDeviceSize, 4> sums(0);
1745 tcu::Vector<deUint32, 4> tmps(0);
1746 tcu::Vector<deUint32, 4> batches(0);
1747
1748 VkDeviceSize updateScratchSize = 0; static_cast<void>(updateScratchSize); // not used yet, disabled for future implementation
1749
1750 auto updateIf = [&](deUint32 c)
1751 {
1752 if (sums[c] + sizes[c] <= maxBufferSize)
1753 {
1754 sums[c] += sizes[c];
1755 tmps[c] += 1;
1756
1757 batches[c] = std::max(tmps[c], batches[c]);
1758 }
1759 else
1760 {
1761 sums[c] = 0;
1762 tmps[c] = 0;
1763 }
1764 };
1765
1766 const deUint32 maxIter = static_cast<deUint32>(structs.size());
1767 for (deUint32 i = 0; i < maxIter; ++i)
1768 {
1769 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(structs[i].get());
1770 std::tie(sizes[0], updateScratchSize, sizes[1], sizes[2], sizes[3]) = str.computeBuildSize(vkd, device, infos[i].structureSize);
1771
1772 updateIf(0);
1773 updateIf(1);
1774 updateIf(2);
1775 updateIf(3);
1776 }
1777
1778 result[0] = std::max(batches[0], 1u);
1779 result[1] = std::max(batches[1], 1u);
1780 result[2] = std::max(batches[2], 1u);
1781 result[3] = std::max(batches[3], 1u);
1782 }
1783
getAllocationCount() const1784 size_t BottomLevelAccelerationStructurePool::getAllocationCount () const
1785 {
1786 return m_impl->m_accellerationStructureBuffers.size()
1787 + m_impl->m_vertexBuffers.size()
1788 + m_impl->m_indexBuffers.size()
1789 + 1 /* for scratch buffer */;
1790 }
1791
getAllocationCount(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize maxBufferSize) const1792 size_t BottomLevelAccelerationStructurePool::getAllocationCount (const DeviceInterface& vk,
1793 const VkDevice device,
1794 const VkDeviceSize maxBufferSize) const
1795 {
1796 DE_ASSERT(m_structs.size() != 0);
1797
1798 std::map<deUint32, VkDeviceSize> accStrSizes;
1799 std::map<deUint32, VkDeviceSize> vertBuffSizes;
1800 std::map<deUint32, VkDeviceSize> indexBuffSizes;
1801 std::map<deUint32, VkDeviceSize> scratchBuffSizes;
1802
1803 const deUint32 allStructsCount = structCount();
1804
1805 deUint32 batchStructCount = m_batchStructCount;
1806 deUint32 batchScratchCount = m_batchStructCount;
1807 deUint32 batchVertexCount = m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1808 deUint32 batchIndexCount = batchVertexCount;
1809
1810 if (!isnegz(maxBufferSize))
1811 {
1812 deUint32 batches[4];
1813 adjustBatchCount(vk, device, m_structs, m_infos, maxBufferSize, batches);
1814 batchStructCount = batches[0];
1815 batchScratchCount = batches[1];
1816 batchVertexCount = batches[2];
1817 batchIndexCount = batches[3];
1818 }
1819
1820 deUint32 iStr = 0;
1821 deUint32 iScratch = 0;
1822 deUint32 iVertex = 0;
1823 deUint32 iIndex = 0;
1824
1825 VkDeviceSize strSize = 0;
1826 VkDeviceSize updateScratchSize = 0;
1827 VkDeviceSize buildScratchSize = 0;
1828 VkDeviceSize vertexSize = 0;
1829 VkDeviceSize indexSize = 0;
1830
1831 for (; iStr < allStructsCount; ++iStr)
1832 {
1833 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1834 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[iStr].structureSize);
1835
1836 {
1837 const VkDeviceSize alignedStrSize = deAlign64(strSize, 256);
1838 const deUint32 accStrIndex = (iStr / batchStructCount);
1839 accStrSizes[accStrIndex] += alignedStrSize;
1840 }
1841
1842 if (buildScratchSize != 0)
1843 {
1844 const VkDeviceSize alignedBuilsScratchSize = deAlign64(buildScratchSize, 256);
1845 const deUint32 scratchBuffIndex = (iScratch/ batchScratchCount);
1846 scratchBuffSizes[scratchBuffIndex] += alignedBuilsScratchSize;
1847 iScratch += 1;
1848 }
1849
1850 if (vertexSize != 0)
1851 {
1852 const VkDeviceSize alignedVertBuffSize = deAlign64(vertexSize, 8);
1853 const deUint32 vertBuffIndex = (iVertex / batchVertexCount);
1854 vertBuffSizes[vertBuffIndex] += alignedVertBuffSize;
1855 iVertex += 1;
1856 }
1857
1858 if (indexSize != 0)
1859 {
1860 const VkDeviceSize alignedIndexBuffSize = deAlign64(indexSize, 8);
1861 const deUint32 indexBuffIndex = (iIndex / batchIndexCount);
1862 indexBuffSizes[indexBuffIndex] += alignedIndexBuffSize;
1863 iIndex += 1;
1864 }
1865 }
1866
1867 return accStrSizes.size()
1868 + vertBuffSizes.size()
1869 + indexBuffSizes.size()
1870 + scratchBuffSizes.size();
1871 }
1872
getAllocationSizes(const DeviceInterface & vk,const VkDevice device) const1873 tcu::Vector<VkDeviceSize, 4> BottomLevelAccelerationStructurePool::getAllocationSizes (const DeviceInterface& vk,
1874 const VkDevice device) const
1875 {
1876 if (m_structsBuffSize)
1877 {
1878 return tcu::Vector<VkDeviceSize, 4>(m_structsBuffSize, m_buildsScratchSize, m_verticesSize, m_indicesSize);
1879 }
1880
1881 VkDeviceSize strSize = 0;
1882 VkDeviceSize updateScratchSize = 0; static_cast<void>(updateScratchSize); // not used yet, disabled for future implementation
1883 VkDeviceSize buildScratchSize = 0;
1884 VkDeviceSize vertexSize = 0;
1885 VkDeviceSize indexSize = 0;
1886 VkDeviceSize sumStrSize = 0;
1887 VkDeviceSize sumUpdateScratchSize = 0; static_cast<void>(sumUpdateScratchSize); // not used yet, disabled for future implementation
1888 VkDeviceSize sumBuildScratchSize = 0;
1889 VkDeviceSize sumVertexSize = 0;
1890 VkDeviceSize sumIndexSize = 0;
1891 for (size_t i = 0; i < structCount(); ++i)
1892 {
1893 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[i].get());
1894 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[i].structureSize);
1895 sumStrSize += deAlign64(strSize, 256);
1896 //sumUpdateScratchSize += deAlign64(updateScratchSize, 256); not used yet, disabled for future implementation
1897 sumBuildScratchSize += deAlign64(buildScratchSize, 256);
1898 sumVertexSize += deAlign64(vertexSize, 8);
1899 sumIndexSize += deAlign64(indexSize, 8);
1900 }
1901 return tcu::Vector<VkDeviceSize, 4>(sumStrSize, sumBuildScratchSize, sumVertexSize, sumIndexSize);
1902 }
1903
batchCreate(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator)1904 void BottomLevelAccelerationStructurePool::batchCreate (const DeviceInterface& vkd,
1905 const VkDevice device,
1906 Allocator& allocator)
1907 {
1908 batchCreateAdjust(vkd, device, allocator, negz<VkDeviceSize>(0));
1909 }
1910
batchCreateAdjust(const DeviceInterface & vkd,const VkDevice device,Allocator & allocator,const VkDeviceSize maxBufferSize)1911 void BottomLevelAccelerationStructurePool::batchCreateAdjust (const DeviceInterface& vkd,
1912 const VkDevice device,
1913 Allocator& allocator,
1914 const VkDeviceSize maxBufferSize)
1915 {
1916 // Prevent a programmer from calling this method more than once.
1917 if (m_createOnce) DE_ASSERT(0);
1918
1919 m_createOnce = true;
1920 DE_ASSERT(m_structs.size() != 0);
1921
1922 auto createAccellerationStructureBuffer = [&](VkDeviceSize bufferSize) -> typename std::add_pointer<BufferWithMemory>::type
1923 {
1924 BufferWithMemory* res = nullptr;
1925 const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1926
1927 if (m_tryCachedMemory) try
1928 {
1929 res = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1930 }
1931 catch (const tcu::NotSupportedError&)
1932 {
1933 res = nullptr;
1934 }
1935
1936 return (nullptr != res)
1937 ? res
1938 : (new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1939 };
1940
1941 auto createDeviceScratchBuffer = [&](VkDeviceSize bufferSize) -> de::SharedPtr<BufferWithMemory>
1942 {
1943 const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1944 BufferWithMemory* p = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1945 return de::SharedPtr<BufferWithMemory>(p);
1946 };
1947
1948 std::map<deUint32, VkDeviceSize> accStrSizes;
1949 std::map<deUint32, VkDeviceSize> vertBuffSizes;
1950 std::map<deUint32, VkDeviceSize> indexBuffSizes;
1951
1952 const deUint32 allStructsCount = structCount();
1953 deUint32 iterKey = 0;
1954
1955 deUint32 batchStructCount = m_batchStructCount;
1956 deUint32 batchVertexCount = m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1957 deUint32 batchIndexCount = batchVertexCount;
1958
1959 if (!isnegz(maxBufferSize))
1960 {
1961 deUint32 batches[4];
1962 adjustBatchCount(vkd, device, m_structs, m_infos, maxBufferSize, batches);
1963 batchStructCount = batches[0];
1964 // batches[1]: batchScratchCount
1965 batchVertexCount = batches[2];
1966 batchIndexCount = batches[3];
1967 }
1968
1969 deUint32 iStr = 0;
1970 deUint32 iVertex = 0;
1971 deUint32 iIndex = 0;
1972
1973 VkDeviceSize strSize = 0;
1974 VkDeviceSize updateScratchSize = 0;
1975 VkDeviceSize buildScratchSize = 0;
1976 VkDeviceSize maxBuildScratchSize = 0;
1977 VkDeviceSize vertexSize = 0;
1978 VkDeviceSize indexSize = 0;
1979
1980 VkDeviceSize strOffset = 0;
1981 VkDeviceSize vertexOffset = 0;
1982 VkDeviceSize indexOffset = 0;
1983
1984 deUint32 hostStructCount = 0;
1985 deUint32 deviceStructCount = 0;
1986
1987 for (; iStr < allStructsCount; ++iStr)
1988 {
1989 BottomLevelAccelerationStructurePoolMember::Info info{};
1990 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1991 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vkd, device, m_infos[iStr].structureSize);
1992
1993 ++(str.getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR ? hostStructCount : deviceStructCount);
1994
1995 {
1996 const VkDeviceSize alignedStrSize = deAlign64(strSize, 256);
1997 const deUint32 accStrIndex = (iStr / batchStructCount);
1998 if (iStr != 0 && (iStr % batchStructCount) == 0)
1999 {
2000 strOffset = 0;
2001 }
2002
2003 info.accStrIndex = accStrIndex;
2004 info.accStrOffset = strOffset;
2005 accStrSizes[accStrIndex] += alignedStrSize;
2006 strOffset += alignedStrSize;
2007 m_structsBuffSize += alignedStrSize;
2008 }
2009
2010 if (buildScratchSize != 0)
2011 {
2012 maxBuildScratchSize = std::max(maxBuildScratchSize, make_unsigned(deAlign64(buildScratchSize, 256u)));
2013
2014 info.buildScratchBuffIndex = 0;
2015 info.buildScratchBuffOffset = 0;
2016 }
2017
2018 if (vertexSize != 0)
2019 {
2020 const VkDeviceSize alignedVertBuffSize = deAlign64(vertexSize, 8);
2021 const deUint32 vertBuffIndex = (iVertex / batchVertexCount);
2022 if (iVertex != 0 && (iVertex % batchVertexCount) == 0)
2023 {
2024 vertexOffset = 0;
2025 }
2026
2027 info.vertBuffIndex = vertBuffIndex;
2028 info.vertBuffOffset = vertexOffset;
2029 vertBuffSizes[vertBuffIndex] += alignedVertBuffSize;
2030 vertexOffset += alignedVertBuffSize;
2031 m_verticesSize += alignedVertBuffSize;
2032 iVertex += 1;
2033 }
2034
2035 if (indexSize != 0)
2036 {
2037 const VkDeviceSize alignedIndexBuffSize = deAlign64(indexSize, 8);
2038 const deUint32 indexBuffIndex = (iIndex / batchIndexCount);
2039 if (iIndex != 0 && (iIndex % batchIndexCount) == 0)
2040 {
2041 indexOffset = 0;
2042 }
2043
2044 info.indexBuffIndex = indexBuffIndex;
2045 info.indexBuffOffset = indexOffset;
2046 indexBuffSizes[indexBuffIndex] += alignedIndexBuffSize;
2047 indexOffset += alignedIndexBuffSize;
2048 m_indicesSize += alignedIndexBuffSize;
2049 iIndex += 1;
2050 }
2051
2052 str.preCreateSetSizesAndOffsets(info, strSize, updateScratchSize, buildScratchSize);
2053 }
2054
2055 for (iterKey = 0; iterKey < static_cast<deUint32>(accStrSizes.size()); ++iterKey)
2056 {
2057 m_impl->m_accellerationStructureBuffers.emplace_back(createAccellerationStructureBuffer(accStrSizes.at(iterKey)));
2058 }
2059 for (iterKey = 0; iterKey < static_cast<deUint32>(vertBuffSizes.size()); ++iterKey)
2060 {
2061 m_impl->m_vertexBuffers.emplace_back(createVertexBuffer(vkd, device, allocator, vertBuffSizes.at(iterKey)));
2062 }
2063 for (iterKey = 0; iterKey < static_cast<deUint32>(indexBuffSizes.size()); ++iterKey)
2064 {
2065 m_impl->m_indexBuffers.emplace_back(createIndexBuffer(vkd, device, allocator, indexBuffSizes.at(iterKey)));
2066 }
2067
2068 if (maxBuildScratchSize)
2069 {
2070 if (hostStructCount) m_impl->m_hostScratchBuffer->resize(static_cast<size_t>(maxBuildScratchSize));
2071 if (deviceStructCount) m_impl->m_deviceScratchBuffer = createDeviceScratchBuffer(maxBuildScratchSize);
2072
2073 m_buildsScratchSize = maxBuildScratchSize;
2074 }
2075
2076 for (iterKey = 0; iterKey < allStructsCount; ++iterKey)
2077 {
2078 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iterKey].get());
2079 str.createAccellerationStructure(vkd, device, m_infos[iterKey].deviceAddress);
2080 }
2081 }
2082
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandBuffer cmdBuffer)2083 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface& vk,
2084 const VkDevice device,
2085 VkCommandBuffer cmdBuffer)
2086 {
2087 for (const auto& str : m_structs)
2088 {
2089 str->build(vk, device, cmdBuffer);
2090 }
2091 }
2092
batchBuild(const DeviceInterface & vk,const VkDevice device,VkCommandPool cmdPool,VkQueue queue,qpWatchDog * watchDog)2093 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface& vk,
2094 const VkDevice device,
2095 VkCommandPool cmdPool,
2096 VkQueue queue,
2097 qpWatchDog* watchDog)
2098 {
2099 const deUint32 limit = 10000u;
2100 const deUint32 count = structCount();
2101 std::vector<BlasPtr> buildingOnDevice;
2102
2103 auto buildOnDevice = [&]() -> void
2104 {
2105 Move<VkCommandBuffer> cmd = allocateCommandBuffer(vk, device, cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2106
2107 beginCommandBuffer(vk, *cmd, 0u);
2108 for (const auto& str : buildingOnDevice)
2109 str->build(vk, device, *cmd);
2110 endCommandBuffer(vk, *cmd);
2111
2112 submitCommandsAndWait(vk, device, queue, *cmd);
2113 vk.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2114 };
2115
2116 buildingOnDevice.reserve(limit);
2117 for (deUint32 i = 0; i < count; ++i)
2118 {
2119 auto str = m_structs[i];
2120
2121 if (str->getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
2122 str->build(vk, device, DE_NULL);
2123 else
2124 buildingOnDevice.emplace_back(str);
2125
2126 if ( buildingOnDevice.size() == limit || (count - 1) == i)
2127 {
2128 buildOnDevice();
2129 buildingOnDevice.clear();
2130 }
2131
2132 if ((i % WATCHDOG_INTERVAL) == 0 && watchDog)
2133 qpWatchDog_touch(watchDog);
2134 }
2135 }
2136
computeBuildSize(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize strSize) const2137 auto BottomLevelAccelerationStructurePoolMember::computeBuildSize (const DeviceInterface& vk,
2138 const VkDevice device,
2139 const VkDeviceSize strSize) const
2140 // accStrSize,updateScratch,buildScratch, vertexSize, indexSize
2141 -> std::tuple<VkDeviceSize, VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize>
2142 {
2143 DE_ASSERT(!m_geometriesData.empty() != !(strSize == 0)); // logical xor
2144
2145 std::tuple<VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize> result(deAlign64(strSize, 256), 0, 0, 0, 0);
2146
2147 if (!m_geometriesData.empty())
2148 {
2149 std::vector<VkAccelerationStructureGeometryKHR> accelerationStructureGeometriesKHR;
2150 std::vector<VkAccelerationStructureGeometryKHR*> accelerationStructureGeometriesKHRPointers;
2151 std::vector<VkAccelerationStructureBuildRangeInfoKHR> accelerationStructureBuildRangeInfoKHR;
2152 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
2153 std::vector<deUint32> maxPrimitiveCounts;
2154 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
2155
2156 const VkAccelerationStructureGeometryKHR* accelerationStructureGeometriesKHRPointer = accelerationStructureGeometriesKHR.data();
2157 const VkAccelerationStructureGeometryKHR* const* accelerationStructureGeometry = accelerationStructureGeometriesKHRPointers.data();
2158
2159 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2160 {
2161 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2162 DE_NULL, // const void* pNext;
2163 VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2164 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2165 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2166 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2167 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
2168 static_cast<deUint32>(accelerationStructureGeometriesKHR.size()), // deUint32 geometryCount;
2169 m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer, // const VkAccelerationStructureGeometryKHR* pGeometries;
2170 m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL, // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2171 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
2172 };
2173
2174 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2175 {
2176 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2177 DE_NULL, // const void* pNext;
2178 0, // VkDeviceSize accelerationStructureSize;
2179 0, // VkDeviceSize updateScratchSize;
2180 0 // VkDeviceSize buildScratchSize;
2181 };
2182
2183 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2184
2185 std::get<0>(result) = sizeInfo.accelerationStructureSize;
2186 std::get<1>(result) = sizeInfo.updateScratchSize;
2187 std::get<2>(result) = sizeInfo.buildScratchSize;
2188 std::get<3>(result) = getVertexBufferSize(m_geometriesData);
2189 std::get<4>(result) = getIndexBufferSize(m_geometriesData);
2190 }
2191
2192 return result;
2193 }
2194
preCreateSetSizesAndOffsets(const Info & info,const VkDeviceSize accStrSize,const VkDeviceSize updateScratchSize,const VkDeviceSize buildScratchSize)2195 void BottomLevelAccelerationStructurePoolMember::preCreateSetSizesAndOffsets (const Info& info,
2196 const VkDeviceSize accStrSize,
2197 const VkDeviceSize updateScratchSize,
2198 const VkDeviceSize buildScratchSize)
2199 {
2200 m_info = info;
2201 m_structureSize = accStrSize;
2202 m_updateScratchSize = updateScratchSize;
2203 m_buildScratchSize = buildScratchSize;
2204 }
2205
createAccellerationStructure(const DeviceInterface & vk,const VkDevice device,VkDeviceAddress deviceAddress)2206 void BottomLevelAccelerationStructurePoolMember::createAccellerationStructure (const DeviceInterface& vk,
2207 const VkDevice device,
2208 VkDeviceAddress deviceAddress)
2209 {
2210 const VkAccelerationStructureTypeKHR structureType = (m_createGeneric
2211 ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2212 : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
2213 const VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfoKHR
2214 {
2215 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // VkStructureType sType;
2216 DE_NULL, // const void* pNext;
2217 m_createFlags, // VkAccelerationStructureCreateFlagsKHR createFlags;
2218 getAccelerationStructureBuffer()->get(), // VkBuffer buffer;
2219 getAccelerationStructureBufferOffset(), // VkDeviceSize offset;
2220 m_structureSize, // VkDeviceSize size;
2221 structureType, // VkAccelerationStructureTypeKHR type;
2222 deviceAddress // VkDeviceAddress deviceAddress;
2223 };
2224
2225 m_accelerationStructureKHR = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2226 }
2227
~TopLevelAccelerationStructure()2228 TopLevelAccelerationStructure::~TopLevelAccelerationStructure ()
2229 {
2230 }
2231
TopLevelAccelerationStructure()2232 TopLevelAccelerationStructure::TopLevelAccelerationStructure ()
2233 : m_structureSize (0u)
2234 , m_updateScratchSize (0u)
2235 , m_buildScratchSize (0u)
2236 {
2237 }
2238
setInstanceCount(const size_t instanceCount)2239 void TopLevelAccelerationStructure::setInstanceCount (const size_t instanceCount)
2240 {
2241 m_bottomLevelInstances.reserve(instanceCount);
2242 m_instanceData.reserve(instanceCount);
2243 }
2244
addInstance(de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,const VkTransformMatrixKHR & matrix,deUint32 instanceCustomIndex,deUint32 mask,deUint32 instanceShaderBindingTableRecordOffset,VkGeometryInstanceFlagsKHR flags)2245 void TopLevelAccelerationStructure::addInstance (de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,
2246 const VkTransformMatrixKHR& matrix,
2247 deUint32 instanceCustomIndex,
2248 deUint32 mask,
2249 deUint32 instanceShaderBindingTableRecordOffset,
2250 VkGeometryInstanceFlagsKHR flags)
2251 {
2252 m_bottomLevelInstances.push_back(bottomLevelStructure);
2253 m_instanceData.push_back(InstanceData(matrix, instanceCustomIndex, mask, instanceShaderBindingTableRecordOffset, flags));
2254 }
2255
getStructureBuildSizes() const2256 VkAccelerationStructureBuildSizesInfoKHR TopLevelAccelerationStructure::getStructureBuildSizes () const
2257 {
2258 return
2259 {
2260 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2261 DE_NULL, // const void* pNext;
2262 m_structureSize, // VkDeviceSize accelerationStructureSize;
2263 m_updateScratchSize, // VkDeviceSize updateScratchSize;
2264 m_buildScratchSize // VkDeviceSize buildScratchSize;
2265 };
2266 }
2267
createAndBuild(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,VkDeviceAddress deviceAddress)2268 void TopLevelAccelerationStructure::createAndBuild (const DeviceInterface& vk,
2269 const VkDevice device,
2270 const VkCommandBuffer cmdBuffer,
2271 Allocator& allocator,
2272 VkDeviceAddress deviceAddress)
2273 {
2274 create(vk, device, allocator, 0u, deviceAddress);
2275 build(vk, device, cmdBuffer);
2276 }
2277
createAndCopyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,TopLevelAccelerationStructure * accelerationStructure,VkDeviceSize compactCopySize,VkDeviceAddress deviceAddress)2278 void TopLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface& vk,
2279 const VkDevice device,
2280 const VkCommandBuffer cmdBuffer,
2281 Allocator& allocator,
2282 TopLevelAccelerationStructure* accelerationStructure,
2283 VkDeviceSize compactCopySize,
2284 VkDeviceAddress deviceAddress)
2285 {
2286 DE_ASSERT(accelerationStructure != NULL);
2287 VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
2288 DE_ASSERT(copiedSize != 0u);
2289
2290 create(vk, device, allocator, copiedSize, deviceAddress);
2291 copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
2292 }
2293
createAndDeserializeFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage,VkDeviceAddress deviceAddress)2294 void TopLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
2295 const VkDevice device,
2296 const VkCommandBuffer cmdBuffer,
2297 Allocator& allocator,
2298 SerialStorage* storage,
2299 VkDeviceAddress deviceAddress)
2300 {
2301 DE_ASSERT(storage != NULL);
2302 DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
2303 create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
2304 if (storage->hasDeepFormat()) createAndDeserializeBottoms(vk, device, cmdBuffer, allocator, storage);
2305 deserialize(vk, device, cmdBuffer, storage);
2306 }
2307
createInstanceBuffer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelInstances,std::vector<InstanceData> instanceData,const bool tryCachedMemory)2308 BufferWithMemory* createInstanceBuffer (const DeviceInterface& vk,
2309 const VkDevice device,
2310 Allocator& allocator,
2311 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > bottomLevelInstances,
2312 std::vector<InstanceData> instanceData,
2313 const bool tryCachedMemory)
2314 {
2315 DE_ASSERT(bottomLevelInstances.size() != 0);
2316 DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2317 DE_UNREF(instanceData);
2318
2319 BufferWithMemory* result = nullptr;
2320 const VkDeviceSize bufferSizeBytes = bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2321 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2322 if (tryCachedMemory) try
2323 {
2324 result = new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2325 }
2326 catch (const tcu::NotSupportedError&)
2327 {
2328 result = nullptr;
2329 }
2330 return result
2331 ? result
2332 : new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2333 }
2334
updateSingleInstance(const DeviceInterface & vk,const VkDevice device,const BottomLevelAccelerationStructure & bottomLevelAccelerationStructure,const InstanceData & instanceData,deUint8 * bufferLocation,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2335 void updateSingleInstance (const DeviceInterface& vk,
2336 const VkDevice device,
2337 const BottomLevelAccelerationStructure& bottomLevelAccelerationStructure,
2338 const InstanceData& instanceData,
2339 deUint8* bufferLocation,
2340 VkAccelerationStructureBuildTypeKHR buildType,
2341 bool inactiveInstances)
2342 {
2343 const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
2344
2345 // This part needs to be fixed once a new version of the VkAccelerationStructureInstanceKHR will be added to vkStructTypes.inl
2346 VkDeviceAddress accelerationStructureAddress;
2347 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2348 {
2349 VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
2350 {
2351 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // VkStructureType sType;
2352 DE_NULL, // const void* pNext;
2353 accelerationStructureKHR // VkAccelerationStructureKHR accelerationStructure;
2354 };
2355 accelerationStructureAddress = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
2356 }
2357
2358 deUint64 structureReference;
2359 if (inactiveInstances)
2360 {
2361 // Instances will be marked inactive by making their references VK_NULL_HANDLE or having address zero.
2362 structureReference = 0ull;
2363 }
2364 else
2365 {
2366 structureReference = (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2367 ? deUint64(accelerationStructureAddress)
2368 : deUint64(accelerationStructureKHR.getInternal());
2369 }
2370
2371 VkAccelerationStructureInstanceKHR accelerationStructureInstanceKHR = makeVkAccelerationStructureInstanceKHR
2372 (
2373 instanceData.matrix, // VkTransformMatrixKHR transform;
2374 instanceData.instanceCustomIndex, // deUint32 instanceCustomIndex:24;
2375 instanceData.mask, // deUint32 mask:8;
2376 instanceData.instanceShaderBindingTableRecordOffset, // deUint32 instanceShaderBindingTableRecordOffset:24;
2377 instanceData.flags, // VkGeometryInstanceFlagsKHR flags:8;
2378 structureReference // deUint64 accelerationStructureReference;
2379 );
2380
2381 deMemcpy(bufferLocation, &accelerationStructureInstanceKHR, sizeof(VkAccelerationStructureInstanceKHR));
2382 }
2383
updateInstanceBuffer(const DeviceInterface & vk,const VkDevice device,const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelInstances,const std::vector<InstanceData> & instanceData,const BufferWithMemory * instanceBuffer,VkAccelerationStructureBuildTypeKHR buildType,bool inactiveInstances)2384 void updateInstanceBuffer (const DeviceInterface& vk,
2385 const VkDevice device,
2386 const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>& bottomLevelInstances,
2387 const std::vector<InstanceData>& instanceData,
2388 const BufferWithMemory* instanceBuffer,
2389 VkAccelerationStructureBuildTypeKHR buildType,
2390 bool inactiveInstances)
2391 {
2392 DE_ASSERT(bottomLevelInstances.size() != 0);
2393 DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2394
2395 auto& instancesAlloc = instanceBuffer->getAllocation();
2396 auto bufferStart = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2397 VkDeviceSize bufferOffset = 0ull;
2398
2399 for (size_t instanceNdx = 0; instanceNdx < bottomLevelInstances.size(); ++instanceNdx)
2400 {
2401 const auto& blas = *bottomLevelInstances[instanceNdx];
2402 updateSingleInstance(vk, device, blas, instanceData[instanceNdx], bufferStart + bufferOffset, buildType, inactiveInstances);
2403 bufferOffset += sizeof(VkAccelerationStructureInstanceKHR);
2404 }
2405
2406 flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2407 }
2408
2409 class TopLevelAccelerationStructureKHR : public TopLevelAccelerationStructure
2410 {
2411 public:
2412 static deUint32 getRequiredAllocationCount (void);
2413
2414 TopLevelAccelerationStructureKHR ();
2415 TopLevelAccelerationStructureKHR (const TopLevelAccelerationStructureKHR& other) = delete;
2416 virtual ~TopLevelAccelerationStructureKHR ();
2417
2418 void setBuildType (const VkAccelerationStructureBuildTypeKHR buildType) override;
2419 void setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags) override;
2420 void setCreateGeneric (bool createGeneric) override;
2421 void setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags) override;
2422 void setBuildWithoutPrimitives (bool buildWithoutPrimitives) override;
2423 void setInactiveInstances (bool inactiveInstances) override;
2424 void setDeferredOperation (const bool deferredOperation,
2425 const deUint32 workerThreadCount) override;
2426 void setUseArrayOfPointers (const bool useArrayOfPointers) override;
2427 void setIndirectBuildParameters (const VkBuffer indirectBuffer,
2428 const VkDeviceSize indirectBufferOffset,
2429 const deUint32 indirectBufferStride) override;
2430 void setUsePPGeometries (const bool usePPGeometries) override;
2431 void setTryCachedMemory (const bool tryCachedMemory) override;
2432 VkBuildAccelerationStructureFlagsKHR getBuildFlags () const override;
2433
2434 void getCreationSizes (const DeviceInterface& vk,
2435 const VkDevice device,
2436 const VkDeviceSize structureSize,
2437 CreationSizes& sizes) override;
2438 void create (const DeviceInterface& vk,
2439 const VkDevice device,
2440 Allocator& allocator,
2441 VkDeviceSize structureSize,
2442 VkDeviceAddress deviceAddress = 0u,
2443 const void* pNext = DE_NULL,
2444 const MemoryRequirement& addMemoryRequirement = MemoryRequirement::Any) override;
2445 void build (const DeviceInterface& vk,
2446 const VkDevice device,
2447 const VkCommandBuffer cmdBuffer) override;
2448 void copyFrom (const DeviceInterface& vk,
2449 const VkDevice device,
2450 const VkCommandBuffer cmdBuffer,
2451 TopLevelAccelerationStructure* accelerationStructure,
2452 bool compactCopy) override;
2453 void serialize (const DeviceInterface& vk,
2454 const VkDevice device,
2455 const VkCommandBuffer cmdBuffer,
2456 SerialStorage* storage) override;
2457 void deserialize (const DeviceInterface& vk,
2458 const VkDevice device,
2459 const VkCommandBuffer cmdBuffer,
2460 SerialStorage* storage) override;
2461
2462 std::vector<VkDeviceSize> getSerializingSizes (const DeviceInterface& vk,
2463 const VkDevice device,
2464 const VkQueue queue,
2465 const deUint32 queueFamilyIndex) override;
2466
2467 std::vector<deUint64> getSerializingAddresses (const DeviceInterface& vk,
2468 const VkDevice device) const override;
2469
2470
2471 const VkAccelerationStructureKHR* getPtr (void) const override;
2472
2473 void updateInstanceMatrix (const DeviceInterface& vk,
2474 const VkDevice device,
2475 size_t instanceIndex,
2476 const VkTransformMatrixKHR& matrix) override;
2477
2478 protected:
2479 VkAccelerationStructureBuildTypeKHR m_buildType;
2480 VkAccelerationStructureCreateFlagsKHR m_createFlags;
2481 bool m_createGeneric;
2482 VkBuildAccelerationStructureFlagsKHR m_buildFlags;
2483 bool m_buildWithoutPrimitives;
2484 bool m_inactiveInstances;
2485 bool m_deferredOperation;
2486 deUint32 m_workerThreadCount;
2487 bool m_useArrayOfPointers;
2488 de::MovePtr<BufferWithMemory> m_accelerationStructureBuffer;
2489 de::MovePtr<BufferWithMemory> m_instanceBuffer;
2490 de::MovePtr<BufferWithMemory> m_instanceAddressBuffer;
2491 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
2492 std::vector<deUint8> m_hostScratchBuffer;
2493 Move<VkAccelerationStructureKHR> m_accelerationStructureKHR;
2494 VkBuffer m_indirectBuffer;
2495 VkDeviceSize m_indirectBufferOffset;
2496 deUint32 m_indirectBufferStride;
2497 bool m_usePPGeometries;
2498 bool m_tryCachedMemory;
2499
2500
2501 void prepareInstances (const DeviceInterface& vk,
2502 const VkDevice device,
2503 VkAccelerationStructureGeometryKHR& accelerationStructureGeometryKHR,
2504 std::vector<deUint32>& maxPrimitiveCounts);
2505
2506 void serializeBottoms (const DeviceInterface& vk,
2507 const VkDevice device,
2508 const VkCommandBuffer cmdBuffer,
2509 SerialStorage* storage,
2510 VkDeferredOperationKHR deferredOperation);
2511
2512 void createAndDeserializeBottoms (const DeviceInterface& vk,
2513 const VkDevice device,
2514 const VkCommandBuffer cmdBuffer,
2515 Allocator& allocator,
2516 SerialStorage* storage) override;
2517 };
2518
getRequiredAllocationCount(void)2519 deUint32 TopLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
2520 {
2521 /*
2522 de::MovePtr<BufferWithMemory> m_instanceBuffer;
2523 de::MovePtr<Allocation> m_accelerationStructureAlloc;
2524 de::MovePtr<BufferWithMemory> m_deviceScratchBuffer;
2525 */
2526 return 3u;
2527 }
2528
TopLevelAccelerationStructureKHR()2529 TopLevelAccelerationStructureKHR::TopLevelAccelerationStructureKHR ()
2530 : TopLevelAccelerationStructure ()
2531 , m_buildType (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2532 , m_createFlags (0u)
2533 , m_createGeneric (false)
2534 , m_buildFlags (0u)
2535 , m_buildWithoutPrimitives (false)
2536 , m_inactiveInstances (false)
2537 , m_deferredOperation (false)
2538 , m_workerThreadCount (0)
2539 , m_useArrayOfPointers (false)
2540 , m_accelerationStructureBuffer (DE_NULL)
2541 , m_instanceBuffer (DE_NULL)
2542 , m_instanceAddressBuffer (DE_NULL)
2543 , m_deviceScratchBuffer (DE_NULL)
2544 , m_accelerationStructureKHR ()
2545 , m_indirectBuffer (DE_NULL)
2546 , m_indirectBufferOffset (0)
2547 , m_indirectBufferStride (0)
2548 , m_usePPGeometries (false)
2549 , m_tryCachedMemory (true)
2550 {
2551 }
2552
~TopLevelAccelerationStructureKHR()2553 TopLevelAccelerationStructureKHR::~TopLevelAccelerationStructureKHR ()
2554 {
2555 }
2556
setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)2557 void TopLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR buildType)
2558 {
2559 m_buildType = buildType;
2560 }
2561
setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)2562 void TopLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR createFlags)
2563 {
2564 m_createFlags = createFlags;
2565 }
2566
setCreateGeneric(bool createGeneric)2567 void TopLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
2568 {
2569 m_createGeneric = createGeneric;
2570 }
2571
setInactiveInstances(bool inactiveInstances)2572 void TopLevelAccelerationStructureKHR::setInactiveInstances (bool inactiveInstances)
2573 {
2574 m_inactiveInstances = inactiveInstances;
2575 }
2576
setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)2577 void TopLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR buildFlags)
2578 {
2579 m_buildFlags = buildFlags;
2580 }
2581
setBuildWithoutPrimitives(bool buildWithoutPrimitives)2582 void TopLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
2583 {
2584 m_buildWithoutPrimitives = buildWithoutPrimitives;
2585 }
2586
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)2587 void TopLevelAccelerationStructureKHR::setDeferredOperation (const bool deferredOperation,
2588 const deUint32 workerThreadCount)
2589 {
2590 m_deferredOperation = deferredOperation;
2591 m_workerThreadCount = workerThreadCount;
2592 }
2593
setUseArrayOfPointers(const bool useArrayOfPointers)2594 void TopLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool useArrayOfPointers)
2595 {
2596 m_useArrayOfPointers = useArrayOfPointers;
2597 }
2598
setUsePPGeometries(const bool usePPGeometries)2599 void TopLevelAccelerationStructureKHR::setUsePPGeometries (const bool usePPGeometries)
2600 {
2601 m_usePPGeometries = usePPGeometries;
2602 }
2603
setTryCachedMemory(const bool tryCachedMemory)2604 void TopLevelAccelerationStructureKHR::setTryCachedMemory (const bool tryCachedMemory)
2605 {
2606 m_tryCachedMemory = tryCachedMemory;
2607 }
2608
setIndirectBuildParameters(const VkBuffer indirectBuffer,const VkDeviceSize indirectBufferOffset,const deUint32 indirectBufferStride)2609 void TopLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer indirectBuffer,
2610 const VkDeviceSize indirectBufferOffset,
2611 const deUint32 indirectBufferStride)
2612 {
2613 m_indirectBuffer = indirectBuffer;
2614 m_indirectBufferOffset = indirectBufferOffset;
2615 m_indirectBufferStride = indirectBufferStride;
2616 }
2617
getBuildFlags() const2618 VkBuildAccelerationStructureFlagsKHR TopLevelAccelerationStructureKHR::getBuildFlags () const
2619 {
2620 return m_buildFlags;
2621 }
2622
sum() const2623 VkDeviceSize TopLevelAccelerationStructure::CreationSizes::sum () const
2624 {
2625 return structure + updateScratch + buildScratch + instancePointers + instancesBuffer;
2626 }
2627
getCreationSizes(const DeviceInterface & vk,const VkDevice device,const VkDeviceSize structureSize,CreationSizes & sizes)2628 void TopLevelAccelerationStructureKHR::getCreationSizes (const DeviceInterface& vk,
2629 const VkDevice device,
2630 const VkDeviceSize structureSize,
2631 CreationSizes& sizes)
2632 {
2633 // AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2634 // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2635 DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2636
2637 if (structureSize == 0)
2638 {
2639 VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR;
2640 const auto accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2641 std::vector<deUint32> maxPrimitiveCounts;
2642 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2643
2644 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2645 {
2646 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2647 DE_NULL, // const void* pNext;
2648 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2649 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2650 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2651 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2652 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
2653 1u, // deUint32 geometryCount;
2654 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR), // const VkAccelerationStructureGeometryKHR* pGeometries;
2655 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr), // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2656 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
2657 };
2658
2659 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2660 {
2661 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2662 DE_NULL, // const void* pNext;
2663 0, // VkDeviceSize accelerationStructureSize;
2664 0, // VkDeviceSize updateScratchSize;
2665 0 // VkDeviceSize buildScratchSize;
2666 };
2667
2668 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2669
2670 sizes.structure = sizeInfo.accelerationStructureSize;
2671 sizes.updateScratch = sizeInfo.updateScratchSize;
2672 sizes.buildScratch = sizeInfo.buildScratchSize;
2673 }
2674 else
2675 {
2676 sizes.structure = structureSize;
2677 sizes.updateScratch = 0u;
2678 sizes.buildScratch = 0u;
2679 }
2680
2681 sizes.instancePointers = 0u;
2682 if (m_useArrayOfPointers)
2683 {
2684 const size_t pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2685 sizes.instancePointers = static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize);
2686 }
2687
2688 sizes.instancesBuffer = m_bottomLevelInstances.empty() ? 0u : m_bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2689 }
2690
create(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkDeviceSize structureSize,VkDeviceAddress deviceAddress,const void * pNext,const MemoryRequirement & addMemoryRequirement)2691 void TopLevelAccelerationStructureKHR::create (const DeviceInterface& vk,
2692 const VkDevice device,
2693 Allocator& allocator,
2694 VkDeviceSize structureSize,
2695 VkDeviceAddress deviceAddress,
2696 const void* pNext,
2697 const MemoryRequirement& addMemoryRequirement)
2698 {
2699 // AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2700 // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2701 DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2702
2703 if (structureSize == 0)
2704 {
2705 VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR;
2706 const auto accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2707 std::vector<deUint32> maxPrimitiveCounts;
2708 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2709
2710 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2711 {
2712 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2713 DE_NULL, // const void* pNext;
2714 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2715 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2716 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2717 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2718 DE_NULL, // VkAccelerationStructureKHR dstAccelerationStructure;
2719 1u, // deUint32 geometryCount;
2720 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR), // const VkAccelerationStructureGeometryKHR* pGeometries;
2721 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr), // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2722 makeDeviceOrHostAddressKHR(DE_NULL) // VkDeviceOrHostAddressKHR scratchData;
2723 };
2724
2725 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2726 {
2727 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // VkStructureType sType;
2728 DE_NULL, // const void* pNext;
2729 0, // VkDeviceSize accelerationStructureSize;
2730 0, // VkDeviceSize updateScratchSize;
2731 0 // VkDeviceSize buildScratchSize;
2732 };
2733
2734 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2735
2736 m_structureSize = sizeInfo.accelerationStructureSize;
2737 m_updateScratchSize = sizeInfo.updateScratchSize;
2738 m_buildScratchSize = sizeInfo.buildScratchSize;
2739 }
2740 else
2741 {
2742 m_structureSize = structureSize;
2743 m_updateScratchSize = 0u;
2744 m_buildScratchSize = 0u;
2745 }
2746
2747 {
2748 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2749 const MemoryRequirement memoryRequirement = addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
2750
2751 try
2752 {
2753 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
2754 }
2755 catch (const tcu::NotSupportedError&)
2756 {
2757 // retry without Cached flag
2758 m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
2759 }
2760 }
2761
2762 {
2763 const VkAccelerationStructureTypeKHR structureType = (m_createGeneric
2764 ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2765 : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
2766 const VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfoKHR =
2767 {
2768 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // VkStructureType sType;
2769 pNext, // const void* pNext;
2770 m_createFlags, // VkAccelerationStructureCreateFlagsKHR createFlags;
2771 m_accelerationStructureBuffer->get(), // VkBuffer buffer;
2772 0u, // VkDeviceSize offset;
2773 m_structureSize, // VkDeviceSize size;
2774 structureType, // VkAccelerationStructureTypeKHR type;
2775 deviceAddress // VkDeviceAddress deviceAddress;
2776 };
2777
2778 m_accelerationStructureKHR = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2779 }
2780
2781 if (m_buildScratchSize > 0u)
2782 {
2783 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2784 {
2785 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2786 m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2787 }
2788 else
2789 {
2790 m_hostScratchBuffer.resize(static_cast<size_t>(m_buildScratchSize));
2791 }
2792 }
2793
2794 if (m_useArrayOfPointers)
2795 {
2796 const size_t pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2797 const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2798 m_instanceAddressBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2799 }
2800
2801 if(!m_bottomLevelInstances.empty())
2802 m_instanceBuffer = de::MovePtr<BufferWithMemory>(createInstanceBuffer(vk, device, allocator, m_bottomLevelInstances, m_instanceData, m_tryCachedMemory));
2803 }
2804
updateInstanceMatrix(const DeviceInterface & vk,const VkDevice device,size_t instanceIndex,const VkTransformMatrixKHR & matrix)2805 void TopLevelAccelerationStructureKHR::updateInstanceMatrix (const DeviceInterface& vk, const VkDevice device, size_t instanceIndex, const VkTransformMatrixKHR& matrix)
2806 {
2807 DE_ASSERT(m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR);
2808 DE_ASSERT(instanceIndex < m_bottomLevelInstances.size());
2809 DE_ASSERT(instanceIndex < m_instanceData.size());
2810
2811 const auto& blas = *m_bottomLevelInstances[instanceIndex];
2812 auto& instanceData = m_instanceData[instanceIndex];
2813 auto& instancesAlloc = m_instanceBuffer->getAllocation();
2814 auto bufferStart = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2815 VkDeviceSize bufferOffset = sizeof(VkAccelerationStructureInstanceKHR) * instanceIndex;
2816
2817 instanceData.matrix = matrix;
2818 updateSingleInstance(vk, device, blas, instanceData, bufferStart + bufferOffset, m_buildType, m_inactiveInstances);
2819 flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2820 }
2821
build(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer)2822 void TopLevelAccelerationStructureKHR::build (const DeviceInterface& vk,
2823 const VkDevice device,
2824 const VkCommandBuffer cmdBuffer)
2825 {
2826 DE_ASSERT(!m_bottomLevelInstances.empty());
2827 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2828 DE_ASSERT(m_buildScratchSize != 0);
2829
2830 updateInstanceBuffer(vk, device, m_bottomLevelInstances, m_instanceData, m_instanceBuffer.get(), m_buildType, m_inactiveInstances);
2831
2832 VkAccelerationStructureGeometryKHR accelerationStructureGeometryKHR;
2833 const auto accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2834 std::vector<deUint32> maxPrimitiveCounts;
2835 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2836
2837 VkDeviceOrHostAddressKHR scratchData = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2838 ? makeDeviceOrHostAddressKHR(vk, device, m_deviceScratchBuffer->get(), 0)
2839 : makeDeviceOrHostAddressKHR(m_hostScratchBuffer.data());
2840
2841 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR =
2842 {
2843 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // VkStructureType sType;
2844 DE_NULL, // const void* pNext;
2845 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // VkAccelerationStructureTypeKHR type;
2846 m_buildFlags, // VkBuildAccelerationStructureFlagsKHR flags;
2847 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // VkBuildAccelerationStructureModeKHR mode;
2848 DE_NULL, // VkAccelerationStructureKHR srcAccelerationStructure;
2849 m_accelerationStructureKHR.get(), // VkAccelerationStructureKHR dstAccelerationStructure;
2850 1u, // deUint32 geometryCount;
2851 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR), // const VkAccelerationStructureGeometryKHR* pGeometries;
2852 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr), // const VkAccelerationStructureGeometryKHR* const* ppGeometries;
2853 scratchData // VkDeviceOrHostAddressKHR scratchData;
2854 };
2855
2856 const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : static_cast<deUint32>(m_bottomLevelInstances.size()));
2857
2858 VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfoKHR =
2859 {
2860 primitiveCount, // deUint32 primitiveCount;
2861 0, // deUint32 primitiveOffset;
2862 0, // deUint32 firstVertex;
2863 0 // deUint32 transformOffset;
2864 };
2865 VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr = &accelerationStructureBuildRangeInfoKHR;
2866
2867 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2868 {
2869 if (m_indirectBuffer == DE_NULL)
2870 vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2871 else
2872 {
2873 VkDeviceAddress indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
2874 deUint32* pMaxPrimitiveCounts = maxPrimitiveCounts.data();
2875 vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
2876 }
2877 }
2878 else if (!m_deferredOperation)
2879 {
2880 VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
2881 }
2882 else
2883 {
2884 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2885 const auto deferredOperation = deferredOperationPtr.get();
2886
2887 VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2888
2889 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2890
2891 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2892
2893 accelerationStructureBuildGeometryInfoKHR.pNext = DE_NULL;
2894 }
2895
2896 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2897 {
2898 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2899 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
2900
2901 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2902 }
2903 }
2904
copyFrom(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,TopLevelAccelerationStructure * accelerationStructure,bool compactCopy)2905 void TopLevelAccelerationStructureKHR::copyFrom (const DeviceInterface& vk,
2906 const VkDevice device,
2907 const VkCommandBuffer cmdBuffer,
2908 TopLevelAccelerationStructure* accelerationStructure,
2909 bool compactCopy)
2910 {
2911 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2912 DE_ASSERT(accelerationStructure != DE_NULL);
2913
2914 VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
2915 {
2916 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
2917 DE_NULL, // const void* pNext;
2918 *(accelerationStructure->getPtr()), // VkAccelerationStructureKHR src;
2919 *(getPtr()), // VkAccelerationStructureKHR dst;
2920 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR // VkCopyAccelerationStructureModeKHR mode;
2921 };
2922
2923 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2924 {
2925 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
2926 }
2927 else if (!m_deferredOperation)
2928 {
2929 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
2930 }
2931 else
2932 {
2933 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2934 const auto deferredOperation = deferredOperationPtr.get();
2935
2936 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
2937
2938 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2939
2940 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2941 }
2942
2943 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2944 {
2945 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2946 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
2947
2948 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2949 }
2950
2951 }
2952
serialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)2953 void TopLevelAccelerationStructureKHR::serialize (const DeviceInterface& vk,
2954 const VkDevice device,
2955 const VkCommandBuffer cmdBuffer,
2956 SerialStorage* storage)
2957 {
2958 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2959 DE_ASSERT(storage != DE_NULL);
2960
2961 const VkCopyAccelerationStructureToMemoryInfoKHR copyAccelerationStructureInfo =
2962 {
2963 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, // VkStructureType sType;
2964 DE_NULL, // const void* pNext;
2965 *(getPtr()), // VkAccelerationStructureKHR src;
2966 storage->getAddress(vk, device, m_buildType), // VkDeviceOrHostAddressKHR dst;
2967 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
2968 };
2969
2970 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2971 {
2972 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, ©AccelerationStructureInfo);
2973 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2974 }
2975 else if (!m_deferredOperation)
2976 {
2977 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, ©AccelerationStructureInfo));
2978 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2979 }
2980 else
2981 {
2982 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2983 const auto deferredOperation = deferredOperationPtr.get();
2984
2985 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, ©AccelerationStructureInfo);
2986
2987 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2988 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, deferredOperation);
2989
2990 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2991 }
2992 }
2993
deserialize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage)2994 void TopLevelAccelerationStructureKHR::deserialize (const DeviceInterface& vk,
2995 const VkDevice device,
2996 const VkCommandBuffer cmdBuffer,
2997 SerialStorage* storage)
2998 {
2999 DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
3000 DE_ASSERT(storage != DE_NULL);
3001
3002 const VkCopyMemoryToAccelerationStructureInfoKHR copyAccelerationStructureInfo =
3003 {
3004 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
3005 DE_NULL, // const void* pNext;
3006 storage->getAddressConst(vk, device, m_buildType), // VkDeviceOrHostAddressConstKHR src;
3007 *(getPtr()), // VkAccelerationStructureKHR dst;
3008 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR // VkCopyAccelerationStructureModeKHR mode;
3009 };
3010
3011 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3012 {
3013 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, ©AccelerationStructureInfo);
3014 }
3015 else if (!m_deferredOperation)
3016 {
3017 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, ©AccelerationStructureInfo));
3018 }
3019 else
3020 {
3021 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
3022 const auto deferredOperation = deferredOperationPtr.get();
3023
3024 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, ©AccelerationStructureInfo);
3025
3026 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3027
3028 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3029 }
3030
3031 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3032 {
3033 const VkAccessFlags accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3034 const VkMemoryBarrier memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
3035
3036 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3037 }
3038 }
3039
serializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,SerialStorage * storage,VkDeferredOperationKHR deferredOperation)3040 void TopLevelAccelerationStructureKHR::serializeBottoms (const DeviceInterface& vk,
3041 const VkDevice device,
3042 const VkCommandBuffer cmdBuffer,
3043 SerialStorage* storage,
3044 VkDeferredOperationKHR deferredOperation)
3045 {
3046 DE_UNREF(deferredOperation);
3047 DE_ASSERT(storage->hasDeepFormat());
3048
3049 const std::vector<deUint64>& addresses = storage->getSerialInfo().addresses();
3050 const std::size_t cbottoms = m_bottomLevelInstances.size();
3051
3052 deUint32 storageIndex = 0;
3053 std::vector<deUint64> matches;
3054
3055 for (std::size_t i = 0; i < cbottoms; ++i)
3056 {
3057 const deUint64& lookAddr = addresses[i+1];
3058 auto end = matches.end();
3059 auto match = std::find_if(matches.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
3060 if (match == end)
3061 {
3062 matches.emplace_back(lookAddr);
3063 m_bottomLevelInstances[i].get()->serialize(vk, device, cmdBuffer, storage->getBottomStorage(storageIndex).get());
3064 storageIndex += 1;
3065 }
3066 }
3067 }
3068
createAndDeserializeBottoms(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,Allocator & allocator,SerialStorage * storage)3069 void TopLevelAccelerationStructureKHR::createAndDeserializeBottoms (const DeviceInterface& vk,
3070 const VkDevice device,
3071 const VkCommandBuffer cmdBuffer,
3072 Allocator& allocator,
3073 SerialStorage* storage)
3074 {
3075 DE_ASSERT(storage->hasDeepFormat());
3076 DE_ASSERT(m_bottomLevelInstances.size() == 0);
3077
3078 const std::vector<deUint64>& addresses = storage->getSerialInfo().addresses();
3079 const std::size_t cbottoms = addresses.size() - 1;
3080 deUint32 storageIndex = 0;
3081 std::vector<std::pair<deUint64, std::size_t>> matches;
3082
3083 for (std::size_t i = 0; i < cbottoms; ++i)
3084 {
3085 const deUint64& lookAddr = addresses[i+1];
3086 auto end = matches.end();
3087 auto match = std::find_if(matches.begin(), end, [&](const std::pair<deUint64, std::size_t>& item){ return item.first == lookAddr; });
3088 if (match != end)
3089 {
3090 m_bottomLevelInstances .emplace_back(m_bottomLevelInstances[match->second]);
3091 }
3092 else
3093 {
3094 de::MovePtr<BottomLevelAccelerationStructure> blas = makeBottomLevelAccelerationStructure();
3095 blas->createAndDeserializeFrom(vk, device, cmdBuffer, allocator, storage->getBottomStorage(storageIndex).get());
3096 m_bottomLevelInstances.emplace_back(de::SharedPtr<BottomLevelAccelerationStructure>(blas.release()));
3097 matches.emplace_back(lookAddr, i);
3098 storageIndex += 1;
3099 }
3100 }
3101
3102 std::vector<deUint64> newAddresses = getSerializingAddresses(vk, device);
3103 DE_ASSERT(addresses.size() == newAddresses.size());
3104
3105 SerialStorage::AccelerationStructureHeader* header = storage->getASHeader();
3106 DE_ASSERT(cbottoms ==header->handleCount);
3107
3108 // finally update bottom-level AS addresses before top-level AS deserialization
3109 for (std::size_t i = 0; i < cbottoms; ++i)
3110 {
3111 header->handleArray[i] = newAddresses[i+1];
3112 }
3113 }
3114
getSerializingSizes(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const deUint32 queueFamilyIndex)3115 std::vector<VkDeviceSize> TopLevelAccelerationStructureKHR::getSerializingSizes (const DeviceInterface& vk,
3116 const VkDevice device,
3117 const VkQueue queue,
3118 const deUint32 queueFamilyIndex)
3119 {
3120 const deUint32 queryCount(deUint32(m_bottomLevelInstances.size()) + 1);
3121 std::vector<VkAccelerationStructureKHR> handles(queryCount);
3122 std::vector<VkDeviceSize> sizes(queryCount);
3123
3124 handles[0] = m_accelerationStructureKHR.get();
3125
3126 for (deUint32 h = 1; h < queryCount; ++h)
3127 handles[h] = *m_bottomLevelInstances[h-1].get()->getPtr();
3128
3129 if (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR == m_buildType)
3130 queryAccelerationStructureSize(vk, device, DE_NULL, handles, m_buildType, DE_NULL, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3131 else
3132 {
3133 const Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, queueFamilyIndex);
3134 const Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3135 const Move<VkQueryPool> queryPool = makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3136
3137 beginCommandBuffer(vk, *cmdBuffer);
3138 queryAccelerationStructureSize(vk, device, *cmdBuffer, handles, m_buildType, *queryPool, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3139 endCommandBuffer(vk, *cmdBuffer);
3140 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
3141
3142 VK_CHECK(vk.getQueryPoolResults(device, *queryPool, 0u, queryCount, queryCount * sizeof(VkDeviceSize), sizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3143 }
3144
3145 return sizes;
3146 }
3147
getSerializingAddresses(const DeviceInterface & vk,const VkDevice device) const3148 std::vector<deUint64> TopLevelAccelerationStructureKHR::getSerializingAddresses (const DeviceInterface& vk, const VkDevice device) const
3149 {
3150 std::vector<deUint64> result(m_bottomLevelInstances.size() + 1);
3151
3152 VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
3153 {
3154 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // VkStructureType sType;
3155 DE_NULL, // const void* pNext;
3156 DE_NULL // VkAccelerationStructureKHR accelerationStructure;
3157 };
3158
3159 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3160 {
3161 asDeviceAddressInfo.accelerationStructure = m_accelerationStructureKHR.get();
3162 result[0] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3163 }
3164 else
3165 {
3166 result[0] = deUint64(getPtr()->getInternal());
3167 }
3168
3169 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3170 {
3171 const BottomLevelAccelerationStructure& bottomLevelAccelerationStructure = *m_bottomLevelInstances[instanceNdx];
3172 const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
3173
3174 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3175 {
3176 asDeviceAddressInfo.accelerationStructure = accelerationStructureKHR;
3177 result[instanceNdx+1] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3178 }
3179 else
3180 {
3181 result[instanceNdx+1] = deUint64(accelerationStructureKHR.getInternal());
3182 }
3183 }
3184
3185 return result;
3186 }
3187
getPtr(void) const3188 const VkAccelerationStructureKHR* TopLevelAccelerationStructureKHR::getPtr (void) const
3189 {
3190 return &m_accelerationStructureKHR.get();
3191 }
3192
prepareInstances(const DeviceInterface & vk,const VkDevice device,VkAccelerationStructureGeometryKHR & accelerationStructureGeometryKHR,std::vector<deUint32> & maxPrimitiveCounts)3193 void TopLevelAccelerationStructureKHR::prepareInstances (const DeviceInterface& vk,
3194 const VkDevice device,
3195 VkAccelerationStructureGeometryKHR& accelerationStructureGeometryKHR,
3196 std::vector<deUint32>& maxPrimitiveCounts)
3197 {
3198 maxPrimitiveCounts.resize(1);
3199 maxPrimitiveCounts[0] = static_cast<deUint32>(m_bottomLevelInstances.size());
3200
3201 VkDeviceOrHostAddressConstKHR instancesData;
3202 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3203 {
3204 if(m_instanceBuffer.get() != DE_NULL)
3205 {
3206 if (m_useArrayOfPointers)
3207 {
3208 deUint8* bufferStart = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3209 VkDeviceSize bufferOffset = 0;
3210 VkDeviceOrHostAddressConstKHR firstInstance = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3211 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3212 {
3213 VkDeviceOrHostAddressConstKHR currentInstance;
3214 currentInstance.deviceAddress = firstInstance.deviceAddress + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3215
3216 deMemcpy(&bufferStart[bufferOffset], ¤tInstance, sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress));
3217 bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress);
3218 }
3219 flushMappedMemoryRange(vk, device, m_instanceAddressBuffer->getAllocation().getMemory(), m_instanceAddressBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
3220
3221 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceAddressBuffer->get(), 0);
3222 }
3223 else
3224 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3225 }
3226 else
3227 instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3228 }
3229 else
3230 {
3231 if (m_instanceBuffer.get() != DE_NULL)
3232 {
3233 if (m_useArrayOfPointers)
3234 {
3235 deUint8* bufferStart = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3236 VkDeviceSize bufferOffset = 0;
3237 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3238 {
3239 VkDeviceOrHostAddressConstKHR currentInstance;
3240 currentInstance.hostAddress = (deUint8*)m_instanceBuffer->getAllocation().getHostPtr() + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3241
3242 deMemcpy(&bufferStart[bufferOffset], ¤tInstance, sizeof(VkDeviceOrHostAddressConstKHR::hostAddress));
3243 bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
3244 }
3245 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceAddressBuffer->getAllocation().getHostPtr());
3246 }
3247 else
3248 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceBuffer->getAllocation().getHostPtr());
3249 }
3250 else
3251 instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3252 }
3253
3254 VkAccelerationStructureGeometryInstancesDataKHR accelerationStructureGeometryInstancesDataKHR =
3255 {
3256 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // VkStructureType sType;
3257 DE_NULL, // const void* pNext;
3258 (VkBool32)( m_useArrayOfPointers ? DE_TRUE : DE_FALSE ), // VkBool32 arrayOfPointers;
3259 instancesData // VkDeviceOrHostAddressConstKHR data;
3260 };
3261
3262 accelerationStructureGeometryKHR =
3263 {
3264 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // VkStructureType sType;
3265 DE_NULL, // const void* pNext;
3266 VK_GEOMETRY_TYPE_INSTANCES_KHR, // VkGeometryTypeKHR geometryType;
3267 makeVkAccelerationStructureInstancesDataKHR(accelerationStructureGeometryInstancesDataKHR), // VkAccelerationStructureGeometryDataKHR geometry;
3268 (VkGeometryFlagsKHR)0u // VkGeometryFlagsKHR flags;
3269 };
3270 }
3271
getRequiredAllocationCount(void)3272 deUint32 TopLevelAccelerationStructure::getRequiredAllocationCount (void)
3273 {
3274 return TopLevelAccelerationStructureKHR::getRequiredAllocationCount();
3275 }
3276
makeTopLevelAccelerationStructure()3277 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure ()
3278 {
3279 return de::MovePtr<TopLevelAccelerationStructure>(new TopLevelAccelerationStructureKHR);
3280 }
3281
queryAccelerationStructureSizeKHR(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3282 bool queryAccelerationStructureSizeKHR (const DeviceInterface& vk,
3283 const VkDevice device,
3284 const VkCommandBuffer cmdBuffer,
3285 const std::vector<VkAccelerationStructureKHR>& accelerationStructureHandles,
3286 VkAccelerationStructureBuildTypeKHR buildType,
3287 const VkQueryPool queryPool,
3288 VkQueryType queryType,
3289 deUint32 firstQuery,
3290 std::vector<VkDeviceSize>& results)
3291 {
3292 DE_ASSERT(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR);
3293
3294 if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3295 {
3296 // queryPool must be large enough to contain at least (firstQuery + accelerationStructureHandles.size()) queries
3297 vk.cmdResetQueryPool(cmdBuffer, queryPool, firstQuery, deUint32(accelerationStructureHandles.size()));
3298 vk.cmdWriteAccelerationStructuresPropertiesKHR(cmdBuffer, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType, queryPool, firstQuery);
3299 // results cannot be retrieved to CPU at the moment - you need to do it using getQueryPoolResults after cmdBuffer is executed. Meanwhile function returns a vector of 0s.
3300 results.resize(accelerationStructureHandles.size(), 0u);
3301 return false;
3302 }
3303 // buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
3304 results.resize(accelerationStructureHandles.size(), 0u);
3305 vk.writeAccelerationStructuresPropertiesKHR(device, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType,
3306 sizeof(VkDeviceSize) * accelerationStructureHandles.size(), results.data(), sizeof(VkDeviceSize));
3307 // results will contain proper values
3308 return true;
3309 }
3310
queryAccelerationStructureSize(const DeviceInterface & vk,const VkDevice device,const VkCommandBuffer cmdBuffer,const std::vector<VkAccelerationStructureKHR> & accelerationStructureHandles,VkAccelerationStructureBuildTypeKHR buildType,const VkQueryPool queryPool,VkQueryType queryType,deUint32 firstQuery,std::vector<VkDeviceSize> & results)3311 bool queryAccelerationStructureSize (const DeviceInterface& vk,
3312 const VkDevice device,
3313 const VkCommandBuffer cmdBuffer,
3314 const std::vector<VkAccelerationStructureKHR>& accelerationStructureHandles,
3315 VkAccelerationStructureBuildTypeKHR buildType,
3316 const VkQueryPool queryPool,
3317 VkQueryType queryType,
3318 deUint32 firstQuery,
3319 std::vector<VkDeviceSize>& results)
3320 {
3321 return queryAccelerationStructureSizeKHR(vk, device, cmdBuffer, accelerationStructureHandles, buildType, queryPool, queryType, firstQuery, results);
3322 }
3323
RayTracingPipeline()3324 RayTracingPipeline::RayTracingPipeline ()
3325 : m_shadersModules ()
3326 , m_pipelineLibraries ()
3327 , m_shaderCreateInfos ()
3328 , m_shadersGroupCreateInfos ()
3329 , m_pipelineCreateFlags (0U)
3330 , m_maxRecursionDepth (1U)
3331 , m_maxPayloadSize (0U)
3332 , m_maxAttributeSize (0U)
3333 , m_deferredOperation (false)
3334 , m_workerThreadCount (0)
3335 {
3336 }
3337
~RayTracingPipeline()3338 RayTracingPipeline::~RayTracingPipeline ()
3339 {
3340 }
3341
3342 #define CHECKED_ASSIGN_SHADER(SHADER, STAGE) \
3343 if (SHADER == VK_SHADER_UNUSED_KHR) \
3344 SHADER = STAGE; \
3345 else \
3346 TCU_THROW(InternalError, "Attempt to reassign shader")
3347
addShader(VkShaderStageFlagBits shaderStage,Move<VkShaderModule> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfo,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3348 void RayTracingPipeline::addShader (VkShaderStageFlagBits shaderStage,
3349 Move<VkShaderModule> shaderModule,
3350 deUint32 group,
3351 const VkSpecializationInfo* specializationInfo,
3352 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,
3353 const void* pipelineShaderStageCreateInfopNext)
3354 {
3355 addShader(shaderStage, makeVkSharedPtr(shaderModule), group, specializationInfo, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3356 }
3357
addShader(VkShaderStageFlagBits shaderStage,de::SharedPtr<Move<VkShaderModule>> shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3358 void RayTracingPipeline::addShader (VkShaderStageFlagBits shaderStage,
3359 de::SharedPtr<Move<VkShaderModule>> shaderModule,
3360 deUint32 group,
3361 const VkSpecializationInfo* specializationInfoPtr,
3362 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,
3363 const void* pipelineShaderStageCreateInfopNext)
3364 {
3365 addShader(shaderStage, **shaderModule, group, specializationInfoPtr, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3366 m_shadersModules.push_back(shaderModule);
3367 }
3368
addShader(VkShaderStageFlagBits shaderStage,VkShaderModule shaderModule,deUint32 group,const VkSpecializationInfo * specializationInfoPtr,const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,const void * pipelineShaderStageCreateInfopNext)3369 void RayTracingPipeline::addShader (VkShaderStageFlagBits shaderStage,
3370 VkShaderModule shaderModule,
3371 deUint32 group,
3372 const VkSpecializationInfo* specializationInfoPtr,
3373 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags,
3374 const void* pipelineShaderStageCreateInfopNext)
3375 {
3376 if (group >= m_shadersGroupCreateInfos.size())
3377 {
3378 for (size_t groupNdx = m_shadersGroupCreateInfos.size(); groupNdx <= group; ++groupNdx)
3379 {
3380 VkRayTracingShaderGroupCreateInfoKHR shaderGroupCreateInfo =
3381 {
3382 VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, // VkStructureType sType;
3383 DE_NULL, // const void* pNext;
3384 VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR, // VkRayTracingShaderGroupTypeKHR type;
3385 VK_SHADER_UNUSED_KHR, // deUint32 generalShader;
3386 VK_SHADER_UNUSED_KHR, // deUint32 closestHitShader;
3387 VK_SHADER_UNUSED_KHR, // deUint32 anyHitShader;
3388 VK_SHADER_UNUSED_KHR, // deUint32 intersectionShader;
3389 DE_NULL, // const void* pShaderGroupCaptureReplayHandle;
3390 };
3391
3392 m_shadersGroupCreateInfos.push_back(shaderGroupCreateInfo);
3393 }
3394 }
3395
3396 const deUint32 shaderStageNdx = (deUint32)m_shaderCreateInfos.size();
3397 VkRayTracingShaderGroupCreateInfoKHR& shaderGroupCreateInfo = m_shadersGroupCreateInfos[group];
3398
3399 switch (shaderStage)
3400 {
3401 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader, shaderStageNdx); break;
3402 case VK_SHADER_STAGE_MISS_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader, shaderStageNdx); break;
3403 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader, shaderStageNdx); break;
3404 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.anyHitShader, shaderStageNdx); break;
3405 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.closestHitShader, shaderStageNdx); break;
3406 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.intersectionShader, shaderStageNdx); break;
3407 default: TCU_THROW(InternalError, "Unacceptable stage");
3408 }
3409
3410 switch (shaderStage)
3411 {
3412 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
3413 case VK_SHADER_STAGE_MISS_BIT_KHR:
3414 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
3415 {
3416 DE_ASSERT(shaderGroupCreateInfo.type == VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR);
3417 shaderGroupCreateInfo.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
3418
3419 break;
3420 }
3421
3422 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
3423 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
3424 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
3425 {
3426 DE_ASSERT(shaderGroupCreateInfo.type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR);
3427 shaderGroupCreateInfo.type = (shaderGroupCreateInfo.intersectionShader == VK_SHADER_UNUSED_KHR)
3428 ? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR
3429 : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR;
3430
3431 break;
3432 }
3433
3434 default: TCU_THROW(InternalError, "Unacceptable stage");
3435 }
3436
3437 {
3438 const VkPipelineShaderStageCreateInfo shaderCreateInfo =
3439 {
3440 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3441 pipelineShaderStageCreateInfopNext, // const void* pNext;
3442 pipelineShaderStageCreateFlags, // VkPipelineShaderStageCreateFlags flags;
3443 shaderStage, // VkShaderStageFlagBits stage;
3444 shaderModule, // VkShaderModule module;
3445 "main", // const char* pName;
3446 specializationInfoPtr, // const VkSpecializationInfo* pSpecializationInfo;
3447 };
3448
3449 m_shaderCreateInfos.push_back(shaderCreateInfo);
3450 }
3451 }
3452
addLibrary(de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)3453 void RayTracingPipeline::addLibrary (de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)
3454 {
3455 m_pipelineLibraries.push_back(pipelineLibrary);
3456 }
3457
createPipelineKHR(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3458 Move<VkPipeline> RayTracingPipeline::createPipelineKHR (const DeviceInterface& vk,
3459 const VkDevice device,
3460 const VkPipelineLayout pipelineLayout,
3461 const std::vector<VkPipeline>& pipelineLibraries,
3462 const VkPipelineCache pipelineCache)
3463 {
3464 for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3465 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3466
3467 VkPipelineLibraryCreateInfoKHR librariesCreateInfo =
3468 {
3469 VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR, // VkStructureType sType;
3470 DE_NULL, // const void* pNext;
3471 de::sizeU32(pipelineLibraries), // deUint32 libraryCount;
3472 de::dataOrNull(pipelineLibraries) // VkPipeline* pLibraries;
3473 };
3474 const VkRayTracingPipelineInterfaceCreateInfoKHR pipelineInterfaceCreateInfo =
3475 {
3476 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR, // VkStructureType sType;
3477 DE_NULL, // const void* pNext;
3478 m_maxPayloadSize, // deUint32 maxPayloadSize;
3479 m_maxAttributeSize // deUint32 maxAttributeSize;
3480 };
3481 const bool addPipelineInterfaceCreateInfo = m_maxPayloadSize != 0 || m_maxAttributeSize != 0;
3482 const VkRayTracingPipelineInterfaceCreateInfoKHR* pipelineInterfaceCreateInfoPtr = addPipelineInterfaceCreateInfo ? &pipelineInterfaceCreateInfo : DE_NULL;
3483 const VkPipelineLibraryCreateInfoKHR* librariesCreateInfoPtr = (pipelineLibraries.empty() ? nullptr : &librariesCreateInfo);
3484
3485 Move<VkDeferredOperationKHR> deferredOperation;
3486 if (m_deferredOperation)
3487 deferredOperation = createDeferredOperationKHR(vk, device);
3488
3489 VkPipelineDynamicStateCreateInfo dynamicStateCreateInfo =
3490 {
3491 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType;
3492 DE_NULL, // const void* pNext;
3493 0, // VkPipelineDynamicStateCreateFlags flags;
3494 static_cast<deUint32>(m_dynamicStates.size() ), // deUint32 dynamicStateCount;
3495 m_dynamicStates.data(), // const VkDynamicState* pDynamicStates;
3496 };
3497
3498 const VkRayTracingPipelineCreateInfoKHR pipelineCreateInfo =
3499 {
3500 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, // VkStructureType sType;
3501 DE_NULL, // const void* pNext;
3502 m_pipelineCreateFlags, // VkPipelineCreateFlags flags;
3503 de::sizeU32(m_shaderCreateInfos), // deUint32 stageCount;
3504 de::dataOrNull(m_shaderCreateInfos), // const VkPipelineShaderStageCreateInfo* pStages;
3505 de::sizeU32(m_shadersGroupCreateInfos), // deUint32 groupCount;
3506 de::dataOrNull(m_shadersGroupCreateInfos), // const VkRayTracingShaderGroupCreateInfoKHR* pGroups;
3507 m_maxRecursionDepth, // deUint32 maxRecursionDepth;
3508 librariesCreateInfoPtr, // VkPipelineLibraryCreateInfoKHR* pLibraryInfo;
3509 pipelineInterfaceCreateInfoPtr, // VkRayTracingPipelineInterfaceCreateInfoKHR* pLibraryInterface;
3510 &dynamicStateCreateInfo, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
3511 pipelineLayout, // VkPipelineLayout layout;
3512 (VkPipeline)DE_NULL, // VkPipeline basePipelineHandle;
3513 0, // deInt32 basePipelineIndex;
3514 };
3515 VkPipeline object = DE_NULL;
3516 VkResult result = vk.createRayTracingPipelinesKHR(device, deferredOperation.get(), pipelineCache, 1u, &pipelineCreateInfo, DE_NULL, &object);
3517 const bool allowCompileRequired = ((m_pipelineCreateFlags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) != 0);
3518
3519 if (m_deferredOperation)
3520 {
3521 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS || (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED));
3522 finishDeferredOperation(vk, device, deferredOperation.get(), m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3523 }
3524
3525 if (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED)
3526 throw CompileRequiredError("createRayTracingPipelinesKHR returned VK_PIPELINE_COMPILE_REQUIRED");
3527
3528 Move<VkPipeline> pipeline (check<VkPipeline>(object), Deleter<VkPipeline>(vk, device, DE_NULL));
3529 return pipeline;
3530 }
3531
3532
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<de::SharedPtr<Move<VkPipeline>>> & pipelineLibraries)3533 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface& vk,
3534 const VkDevice device,
3535 const VkPipelineLayout pipelineLayout,
3536 const std::vector<de::SharedPtr<Move<VkPipeline>>>& pipelineLibraries)
3537 {
3538 std::vector<VkPipeline> rawPipelines;
3539 rawPipelines.reserve(pipelineLibraries.size());
3540 for (const auto& lib : pipelineLibraries)
3541 rawPipelines.push_back(lib.get()->get());
3542
3543 return createPipelineKHR(vk, device, pipelineLayout, rawPipelines);
3544 }
3545
createPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const std::vector<VkPipeline> & pipelineLibraries,const VkPipelineCache pipelineCache)3546 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface& vk,
3547 const VkDevice device,
3548 const VkPipelineLayout pipelineLayout,
3549 const std::vector<VkPipeline>& pipelineLibraries,
3550 const VkPipelineCache pipelineCache)
3551 {
3552 return createPipelineKHR(vk, device, pipelineLayout, pipelineLibraries, pipelineCache);
3553 }
3554
createPipelineWithLibraries(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout)3555 std::vector<de::SharedPtr<Move<VkPipeline>>> RayTracingPipeline::createPipelineWithLibraries (const DeviceInterface& vk,
3556 const VkDevice device,
3557 const VkPipelineLayout pipelineLayout)
3558 {
3559 for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3560 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3561
3562 DE_ASSERT(m_shaderCreateInfos.size() > 0);
3563 DE_ASSERT(m_shadersGroupCreateInfos.size() > 0);
3564
3565 std::vector<de::SharedPtr<Move<VkPipeline>>> result, allLibraries, firstLibraries;
3566 for(auto it=begin(m_pipelineLibraries), eit=end(m_pipelineLibraries); it!=eit; ++it)
3567 {
3568 auto childLibraries = (*it)->get()->createPipelineWithLibraries(vk, device, pipelineLayout);
3569 DE_ASSERT(childLibraries.size() > 0);
3570 firstLibraries.push_back(childLibraries[0]);
3571 std::copy(begin(childLibraries), end(childLibraries), std::back_inserter(allLibraries));
3572 }
3573 result.push_back(makeVkSharedPtr(createPipeline(vk, device, pipelineLayout, firstLibraries)));
3574 std::copy(begin(allLibraries), end(allLibraries), std::back_inserter(result));
3575 return result;
3576 }
3577
createShaderBindingTable(const DeviceInterface & vk,const VkDevice device,const VkPipeline pipeline,Allocator & allocator,const deUint32 & shaderGroupHandleSize,const deUint32 shaderGroupBaseAlignment,const deUint32 & firstGroup,const deUint32 & groupCount,const VkBufferCreateFlags & additionalBufferCreateFlags,const VkBufferUsageFlags & additionalBufferUsageFlags,const MemoryRequirement & additionalMemoryRequirement,const VkDeviceAddress & opaqueCaptureAddress,const deUint32 shaderBindingTableOffset,const deUint32 shaderRecordSize,const void ** shaderGroupDataPtrPerGroup,const bool autoAlignRecords)3578 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable (const DeviceInterface& vk,
3579 const VkDevice device,
3580 const VkPipeline pipeline,
3581 Allocator& allocator,
3582 const deUint32& shaderGroupHandleSize,
3583 const deUint32 shaderGroupBaseAlignment,
3584 const deUint32& firstGroup,
3585 const deUint32& groupCount,
3586 const VkBufferCreateFlags& additionalBufferCreateFlags,
3587 const VkBufferUsageFlags& additionalBufferUsageFlags,
3588 const MemoryRequirement& additionalMemoryRequirement,
3589 const VkDeviceAddress& opaqueCaptureAddress,
3590 const deUint32 shaderBindingTableOffset,
3591 const deUint32 shaderRecordSize,
3592 const void** shaderGroupDataPtrPerGroup,
3593 const bool autoAlignRecords)
3594 {
3595 DE_ASSERT(shaderGroupBaseAlignment != 0u);
3596 DE_ASSERT((shaderBindingTableOffset % shaderGroupBaseAlignment) == 0);
3597 DE_UNREF(shaderGroupBaseAlignment);
3598
3599 const auto totalEntrySize = (autoAlignRecords ? (deAlign32(shaderGroupHandleSize + shaderRecordSize, shaderGroupHandleSize)) : (shaderGroupHandleSize + shaderRecordSize));
3600 const deUint32 sbtSize = shaderBindingTableOffset + groupCount * totalEntrySize;
3601 const VkBufferUsageFlags sbtFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | additionalBufferUsageFlags;
3602 VkBufferCreateInfo sbtCreateInfo = makeBufferCreateInfo(sbtSize, sbtFlags);
3603 sbtCreateInfo.flags |= additionalBufferCreateFlags;
3604 VkBufferOpaqueCaptureAddressCreateInfo sbtCaptureAddressInfo =
3605 {
3606 VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO, // VkStructureType sType;
3607 DE_NULL, // const void* pNext;
3608 deUint64(opaqueCaptureAddress) // deUint64 opaqueCaptureAddress;
3609 };
3610
3611 if (opaqueCaptureAddress != 0u)
3612 {
3613 sbtCreateInfo.pNext = &sbtCaptureAddressInfo;
3614 sbtCreateInfo.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
3615 }
3616 const MemoryRequirement sbtMemRequirements = MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress | additionalMemoryRequirement;
3617 de::MovePtr<BufferWithMemory> sbtBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, sbtCreateInfo, sbtMemRequirements));
3618 vk::Allocation& sbtAlloc = sbtBuffer->getAllocation();
3619
3620 // collect shader group handles
3621 std::vector<deUint8> shaderHandles (groupCount * shaderGroupHandleSize);
3622 VK_CHECK(getRayTracingShaderGroupHandles(vk, device, pipeline, firstGroup, groupCount, groupCount * shaderGroupHandleSize, shaderHandles.data()));
3623
3624 // reserve place for ShaderRecordKHR after each shader handle ( ShaderRecordKHR size might be 0 ). Also take alignment into consideration
3625 deUint8* shaderBegin = (deUint8*)sbtAlloc.getHostPtr() + shaderBindingTableOffset;
3626 for (deUint32 idx = 0; idx < groupCount; ++idx)
3627 {
3628 deUint8* shaderSrcPos = shaderHandles.data() + idx * shaderGroupHandleSize;
3629 deUint8* shaderDstPos = shaderBegin + idx * totalEntrySize;
3630 deMemcpy(shaderDstPos, shaderSrcPos, shaderGroupHandleSize);
3631
3632 if (shaderGroupDataPtrPerGroup != nullptr &&
3633 shaderGroupDataPtrPerGroup[idx] != nullptr)
3634 {
3635 DE_ASSERT(sbtSize >= static_cast<deUint32>(shaderDstPos - shaderBegin) + shaderGroupHandleSize);
3636
3637 deMemcpy( shaderDstPos + shaderGroupHandleSize,
3638 shaderGroupDataPtrPerGroup[idx],
3639 shaderRecordSize);
3640 }
3641 }
3642
3643 flushMappedMemoryRange(vk, device, sbtAlloc.getMemory(), sbtAlloc.getOffset(), VK_WHOLE_SIZE);
3644
3645 return sbtBuffer;
3646 }
3647
setCreateFlags(const VkPipelineCreateFlags & pipelineCreateFlags)3648 void RayTracingPipeline::setCreateFlags (const VkPipelineCreateFlags& pipelineCreateFlags)
3649 {
3650 m_pipelineCreateFlags = pipelineCreateFlags;
3651 }
3652
setMaxRecursionDepth(const deUint32 & maxRecursionDepth)3653 void RayTracingPipeline::setMaxRecursionDepth (const deUint32& maxRecursionDepth)
3654 {
3655 m_maxRecursionDepth = maxRecursionDepth;
3656 }
3657
setMaxPayloadSize(const deUint32 & maxPayloadSize)3658 void RayTracingPipeline::setMaxPayloadSize (const deUint32& maxPayloadSize)
3659 {
3660 m_maxPayloadSize = maxPayloadSize;
3661 }
3662
setMaxAttributeSize(const deUint32 & maxAttributeSize)3663 void RayTracingPipeline::setMaxAttributeSize (const deUint32& maxAttributeSize)
3664 {
3665 m_maxAttributeSize = maxAttributeSize;
3666 }
3667
setDeferredOperation(const bool deferredOperation,const deUint32 workerThreadCount)3668 void RayTracingPipeline::setDeferredOperation (const bool deferredOperation,
3669 const deUint32 workerThreadCount)
3670 {
3671 m_deferredOperation = deferredOperation;
3672 m_workerThreadCount = workerThreadCount;
3673 }
3674
addDynamicState(const VkDynamicState & dynamicState)3675 void RayTracingPipeline::addDynamicState(const VkDynamicState& dynamicState)
3676 {
3677 m_dynamicStates.push_back(dynamicState);
3678 }
3679
3680 class RayTracingPropertiesKHR : public RayTracingProperties
3681 {
3682 public:
3683 RayTracingPropertiesKHR () = delete;
3684 RayTracingPropertiesKHR (const InstanceInterface& vki,
3685 const VkPhysicalDevice physicalDevice);
3686 virtual ~RayTracingPropertiesKHR ();
3687
getShaderGroupHandleSize(void)3688 uint32_t getShaderGroupHandleSize (void) override { return m_rayTracingPipelineProperties.shaderGroupHandleSize; }
getShaderGroupHandleAlignment(void)3689 uint32_t getShaderGroupHandleAlignment (void) override { return m_rayTracingPipelineProperties.shaderGroupHandleAlignment; }
getMaxRecursionDepth(void)3690 uint32_t getMaxRecursionDepth (void) override { return m_rayTracingPipelineProperties.maxRayRecursionDepth; }
getMaxShaderGroupStride(void)3691 uint32_t getMaxShaderGroupStride (void) override { return m_rayTracingPipelineProperties.maxShaderGroupStride; }
getShaderGroupBaseAlignment(void)3692 uint32_t getShaderGroupBaseAlignment (void) override { return m_rayTracingPipelineProperties.shaderGroupBaseAlignment; }
getMaxGeometryCount(void)3693 uint64_t getMaxGeometryCount (void) override { return m_accelerationStructureProperties.maxGeometryCount; }
getMaxInstanceCount(void)3694 uint64_t getMaxInstanceCount (void) override { return m_accelerationStructureProperties.maxInstanceCount; }
getMaxPrimitiveCount(void)3695 uint64_t getMaxPrimitiveCount (void) override { return m_accelerationStructureProperties.maxPrimitiveCount; }
getMaxDescriptorSetAccelerationStructures(void)3696 uint32_t getMaxDescriptorSetAccelerationStructures (void) override { return m_accelerationStructureProperties.maxDescriptorSetAccelerationStructures; }
getMaxRayDispatchInvocationCount(void)3697 uint32_t getMaxRayDispatchInvocationCount (void) override { return m_rayTracingPipelineProperties.maxRayDispatchInvocationCount; }
getMaxRayHitAttributeSize(void)3698 uint32_t getMaxRayHitAttributeSize (void) override { return m_rayTracingPipelineProperties.maxRayHitAttributeSize; }
getMaxMemoryAllocationCount(void)3699 uint32_t getMaxMemoryAllocationCount (void) override { return m_maxMemoryAllocationCount; }
3700
3701 protected:
3702 VkPhysicalDeviceAccelerationStructurePropertiesKHR m_accelerationStructureProperties;
3703 VkPhysicalDeviceRayTracingPipelinePropertiesKHR m_rayTracingPipelineProperties;
3704 deUint32 m_maxMemoryAllocationCount;
3705 };
3706
~RayTracingPropertiesKHR()3707 RayTracingPropertiesKHR::~RayTracingPropertiesKHR ()
3708 {
3709 }
3710
RayTracingPropertiesKHR(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3711 RayTracingPropertiesKHR::RayTracingPropertiesKHR (const InstanceInterface& vki,
3712 const VkPhysicalDevice physicalDevice)
3713 : RayTracingProperties (vki, physicalDevice)
3714 {
3715 m_accelerationStructureProperties = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3716 m_rayTracingPipelineProperties = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3717 m_maxMemoryAllocationCount = getPhysicalDeviceProperties(vki, physicalDevice).limits.maxMemoryAllocationCount;
3718 }
3719
makeRayTracingProperties(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)3720 de::MovePtr<RayTracingProperties> makeRayTracingProperties (const InstanceInterface& vki,
3721 const VkPhysicalDevice physicalDevice)
3722 {
3723 return de::MovePtr<RayTracingProperties>(new RayTracingPropertiesKHR(vki, physicalDevice));
3724 }
3725
cmdTraceRaysKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3726 static inline void cmdTraceRaysKHR (const DeviceInterface& vk,
3727 VkCommandBuffer commandBuffer,
3728 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3729 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3730 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3731 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3732 deUint32 width,
3733 deUint32 height,
3734 deUint32 depth)
3735 {
3736 return vk.cmdTraceRaysKHR(commandBuffer,
3737 raygenShaderBindingTableRegion,
3738 missShaderBindingTableRegion,
3739 hitShaderBindingTableRegion,
3740 callableShaderBindingTableRegion,
3741 width,
3742 height,
3743 depth);
3744 }
3745
3746
cmdTraceRays(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,deUint32 width,deUint32 height,deUint32 depth)3747 void cmdTraceRays (const DeviceInterface& vk,
3748 VkCommandBuffer commandBuffer,
3749 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3750 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3751 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3752 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3753 deUint32 width,
3754 deUint32 height,
3755 deUint32 depth)
3756 {
3757 DE_ASSERT(raygenShaderBindingTableRegion != DE_NULL);
3758 DE_ASSERT(missShaderBindingTableRegion != DE_NULL);
3759 DE_ASSERT(hitShaderBindingTableRegion != DE_NULL);
3760 DE_ASSERT(callableShaderBindingTableRegion != DE_NULL);
3761
3762 return cmdTraceRaysKHR(vk,
3763 commandBuffer,
3764 raygenShaderBindingTableRegion,
3765 missShaderBindingTableRegion,
3766 hitShaderBindingTableRegion,
3767 callableShaderBindingTableRegion,
3768 width,
3769 height,
3770 depth);
3771 }
3772
cmdTraceRaysIndirectKHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3773 static inline void cmdTraceRaysIndirectKHR (const DeviceInterface& vk,
3774 VkCommandBuffer commandBuffer,
3775 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3776 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3777 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3778 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3779 VkDeviceAddress indirectDeviceAddress )
3780 {
3781 DE_ASSERT(raygenShaderBindingTableRegion != DE_NULL);
3782 DE_ASSERT(missShaderBindingTableRegion != DE_NULL);
3783 DE_ASSERT(hitShaderBindingTableRegion != DE_NULL);
3784 DE_ASSERT(callableShaderBindingTableRegion != DE_NULL);
3785 DE_ASSERT(indirectDeviceAddress != 0);
3786
3787 return vk.cmdTraceRaysIndirectKHR(commandBuffer,
3788 raygenShaderBindingTableRegion,
3789 missShaderBindingTableRegion,
3790 hitShaderBindingTableRegion,
3791 callableShaderBindingTableRegion,
3792 indirectDeviceAddress);
3793 }
3794
cmdTraceRaysIndirect(const DeviceInterface & vk,VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * raygenShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * missShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * hitShaderBindingTableRegion,const VkStridedDeviceAddressRegionKHR * callableShaderBindingTableRegion,VkDeviceAddress indirectDeviceAddress)3795 void cmdTraceRaysIndirect (const DeviceInterface& vk,
3796 VkCommandBuffer commandBuffer,
3797 const VkStridedDeviceAddressRegionKHR* raygenShaderBindingTableRegion,
3798 const VkStridedDeviceAddressRegionKHR* missShaderBindingTableRegion,
3799 const VkStridedDeviceAddressRegionKHR* hitShaderBindingTableRegion,
3800 const VkStridedDeviceAddressRegionKHR* callableShaderBindingTableRegion,
3801 VkDeviceAddress indirectDeviceAddress)
3802 {
3803 return cmdTraceRaysIndirectKHR(vk,
3804 commandBuffer,
3805 raygenShaderBindingTableRegion,
3806 missShaderBindingTableRegion,
3807 hitShaderBindingTableRegion,
3808 callableShaderBindingTableRegion,
3809 indirectDeviceAddress);
3810 }
3811
cmdTraceRaysIndirect2KHR(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)3812 static inline void cmdTraceRaysIndirect2KHR (const DeviceInterface& vk,
3813 VkCommandBuffer commandBuffer,
3814 VkDeviceAddress indirectDeviceAddress )
3815 {
3816 DE_ASSERT(indirectDeviceAddress != 0);
3817
3818 return vk.cmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
3819 }
3820
cmdTraceRaysIndirect2(const DeviceInterface & vk,VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)3821 void cmdTraceRaysIndirect2 (const DeviceInterface& vk,
3822 VkCommandBuffer commandBuffer,
3823 VkDeviceAddress indirectDeviceAddress)
3824 {
3825 return cmdTraceRaysIndirect2KHR(vk, commandBuffer, indirectDeviceAddress);
3826 }
3827
3828 #else
3829
3830 deUint32 rayTracingDefineAnything()
3831 {
3832 return 0;
3833 }
3834
3835 #endif // CTS_USES_VULKANSC
3836
3837 } // vk
3838