1 /*
2 * Copyright © 2022 Friedrich Vock
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "meta/radv_meta.h"
25 #include "util/u_process.h"
26 #include "radv_private.h"
27 #include "vk_acceleration_structure.h"
28 #include "vk_common_entrypoints.h"
29 #include "wsi_common_entrypoints.h"
30
31 VKAPI_ATTR VkResult VKAPI_CALL
rra_QueuePresentKHR(VkQueue _queue,const VkPresentInfoKHR * pPresentInfo)32 rra_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
33 {
34 RADV_FROM_HANDLE(radv_queue, queue, _queue);
35
36 if (queue->device->rra_trace.triggered) {
37 queue->device->rra_trace.triggered = false;
38
39 if (_mesa_hash_table_num_entries(queue->device->rra_trace.accel_structs) == 0) {
40 fprintf(stderr, "radv: No acceleration structures captured, not saving RRA trace.\n");
41 } else {
42 char filename[2048];
43 time_t t = time(NULL);
44 struct tm now = *localtime(&t);
45 snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rra", util_get_process_name(),
46 1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
47
48 VkResult result = radv_rra_dump_trace(_queue, filename);
49 if (result == VK_SUCCESS)
50 fprintf(stderr, "radv: RRA capture saved to '%s'\n", filename);
51 else
52 fprintf(stderr, "radv: Failed to save RRA capture!\n");
53 }
54 }
55
56 VkResult result = queue->device->layer_dispatch.rra.QueuePresentKHR(_queue, pPresentInfo);
57 if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
58 return result;
59
60 VkDevice _device = radv_device_to_handle(queue->device);
61 radv_rra_trace_clear_ray_history(_device, &queue->device->rra_trace);
62
63 if (queue->device->rra_trace.triggered) {
64 result = queue->device->layer_dispatch.rra.DeviceWaitIdle(_device);
65 if (result != VK_SUCCESS)
66 return result;
67
68 struct radv_ray_history_header *header = queue->device->rra_trace.ray_history_data;
69 header->offset = sizeof(struct radv_ray_history_header);
70 }
71
72 if (!queue->device->rra_trace.copy_after_build)
73 return VK_SUCCESS;
74
75 struct hash_table *accel_structs = queue->device->rra_trace.accel_structs;
76
77 hash_table_foreach (accel_structs, entry) {
78 struct radv_rra_accel_struct_data *data = entry->data;
79 if (!data->is_dead)
80 continue;
81
82 radv_destroy_rra_accel_struct_data(_device, data);
83 _mesa_hash_table_remove(accel_structs, entry);
84 }
85
86 return VK_SUCCESS;
87 }
88
89 static VkResult
rra_init_accel_struct_data_buffer(VkDevice vk_device,struct radv_rra_accel_struct_data * data)90 rra_init_accel_struct_data_buffer(VkDevice vk_device, struct radv_rra_accel_struct_data *data)
91 {
92 RADV_FROM_HANDLE(radv_device, device, vk_device);
93 VkBufferCreateInfo buffer_create_info = {
94 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
95 .size = data->size,
96 };
97
98 VkResult result = radv_create_buffer(device, &buffer_create_info, NULL, &data->buffer, true);
99 if (result != VK_SUCCESS)
100 return result;
101
102 VkMemoryRequirements requirements;
103 vk_common_GetBufferMemoryRequirements(vk_device, data->buffer, &requirements);
104
105 VkMemoryAllocateFlagsInfo flags_info = {
106 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
107 .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
108 };
109
110 VkMemoryAllocateInfo alloc_info = {
111 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
112 .pNext = &flags_info,
113 .allocationSize = requirements.size,
114 .memoryTypeIndex = device->rra_trace.copy_memory_index,
115 };
116 result = radv_alloc_memory(device, &alloc_info, NULL, &data->memory, true);
117 if (result != VK_SUCCESS)
118 goto fail_buffer;
119
120 result = vk_common_BindBufferMemory(vk_device, data->buffer, data->memory, 0);
121 if (result != VK_SUCCESS)
122 goto fail_memory;
123
124 return result;
125 fail_memory:
126 radv_FreeMemory(vk_device, data->memory, NULL);
127 fail_buffer:
128 radv_DestroyBuffer(vk_device, data->buffer, NULL);
129 return result;
130 }
131
132 VKAPI_ATTR VkResult VKAPI_CALL
rra_CreateAccelerationStructureKHR(VkDevice _device,const VkAccelerationStructureCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkAccelerationStructureKHR * pAccelerationStructure)133 rra_CreateAccelerationStructureKHR(VkDevice _device, const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
134 const VkAllocationCallbacks *pAllocator,
135 VkAccelerationStructureKHR *pAccelerationStructure)
136 {
137 RADV_FROM_HANDLE(radv_device, device, _device);
138 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
139
140 VkResult result = device->layer_dispatch.rra.CreateAccelerationStructureKHR(_device, pCreateInfo, pAllocator,
141 pAccelerationStructure);
142
143 if (result != VK_SUCCESS)
144 return result;
145
146 RADV_FROM_HANDLE(vk_acceleration_structure, structure, *pAccelerationStructure);
147 simple_mtx_lock(&device->rra_trace.data_mtx);
148
149 struct radv_rra_accel_struct_data *data = calloc(1, sizeof(struct radv_rra_accel_struct_data));
150 if (!data) {
151 result = VK_ERROR_OUT_OF_HOST_MEMORY;
152 goto fail_as;
153 }
154
155 data->va = buffer->bo ? vk_acceleration_structure_get_va(structure) : 0;
156 data->size = structure->size;
157 data->type = pCreateInfo->type;
158 data->is_dead = false;
159
160 VkEventCreateInfo eventCreateInfo = {
161 .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
162 };
163
164 result = radv_create_event(device, &eventCreateInfo, NULL, &data->build_event, true);
165 if (result != VK_SUCCESS)
166 goto fail_data;
167
168 if (device->rra_trace.copy_after_build) {
169 result = rra_init_accel_struct_data_buffer(_device, data);
170 if (result != VK_SUCCESS)
171 goto fail_event;
172 }
173
174 _mesa_hash_table_insert(device->rra_trace.accel_structs, structure, data);
175
176 if (data->va)
177 _mesa_hash_table_u64_insert(device->rra_trace.accel_struct_vas, data->va, structure);
178
179 goto exit;
180 fail_event:
181 radv_DestroyEvent(_device, data->build_event, NULL);
182 fail_data:
183 free(data);
184 fail_as:
185 device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, *pAccelerationStructure, pAllocator);
186 *pAccelerationStructure = VK_NULL_HANDLE;
187 exit:
188 simple_mtx_unlock(&device->rra_trace.data_mtx);
189 return result;
190 }
191
192 static void
handle_accel_struct_write(VkCommandBuffer commandBuffer,struct vk_acceleration_structure * accel_struct,struct radv_rra_accel_struct_data * data)193 handle_accel_struct_write(VkCommandBuffer commandBuffer, struct vk_acceleration_structure *accel_struct,
194 struct radv_rra_accel_struct_data *data)
195 {
196 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
197
198 VkMemoryBarrier2 barrier = {
199 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
200 .srcStageMask = VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
201 .srcAccessMask = VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
202 .dstStageMask = VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT,
203 .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
204 };
205
206 VkDependencyInfo dependencyInfo = {
207 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
208 .memoryBarrierCount = 1,
209 .pMemoryBarriers = &barrier,
210 };
211
212 radv_CmdPipelineBarrier2(commandBuffer, &dependencyInfo);
213
214 vk_common_CmdSetEvent(commandBuffer, data->build_event, 0);
215
216 if (!data->va) {
217 data->va = vk_acceleration_structure_get_va(accel_struct);
218 _mesa_hash_table_u64_insert(cmd_buffer->device->rra_trace.accel_struct_vas, data->va, accel_struct);
219 }
220
221 if (!data->buffer)
222 return;
223
224 VkBufferCopy2 region = {
225 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
226 .srcOffset = accel_struct->offset,
227 .size = accel_struct->size,
228 };
229
230 VkCopyBufferInfo2 copyInfo = {
231 .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
232 .srcBuffer = accel_struct->buffer,
233 .dstBuffer = data->buffer,
234 .regionCount = 1,
235 .pRegions = ®ion,
236 };
237
238 radv_CmdCopyBuffer2(commandBuffer, ©Info);
239 }
240
241 VKAPI_ATTR void VKAPI_CALL
rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)242 rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
243 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
244 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
245 {
246 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
247 cmd_buffer->device->layer_dispatch.rra.CmdBuildAccelerationStructuresKHR(commandBuffer, infoCount, pInfos,
248 ppBuildRangeInfos);
249
250 simple_mtx_lock(&cmd_buffer->device->rra_trace.data_mtx);
251 for (uint32_t i = 0; i < infoCount; ++i) {
252 RADV_FROM_HANDLE(vk_acceleration_structure, structure, pInfos[i].dstAccelerationStructure);
253 struct hash_entry *entry = _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure);
254
255 assert(entry);
256 struct radv_rra_accel_struct_data *data = entry->data;
257
258 handle_accel_struct_write(commandBuffer, structure, data);
259 }
260 simple_mtx_unlock(&cmd_buffer->device->rra_trace.data_mtx);
261 }
262
263 VKAPI_ATTR void VKAPI_CALL
rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)264 rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
265 {
266 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
267 cmd_buffer->device->layer_dispatch.rra.CmdCopyAccelerationStructureKHR(commandBuffer, pInfo);
268
269 simple_mtx_lock(&cmd_buffer->device->rra_trace.data_mtx);
270
271 RADV_FROM_HANDLE(vk_acceleration_structure, structure, pInfo->dst);
272 struct hash_entry *entry = _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure);
273
274 assert(entry);
275 struct radv_rra_accel_struct_data *data = entry->data;
276
277 handle_accel_struct_write(commandBuffer, structure, data);
278
279 simple_mtx_unlock(&cmd_buffer->device->rra_trace.data_mtx);
280 }
281
282 VKAPI_ATTR void VKAPI_CALL
rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)283 rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
284 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
285 {
286 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
287 cmd_buffer->device->layer_dispatch.rra.CmdCopyMemoryToAccelerationStructureKHR(commandBuffer, pInfo);
288
289 simple_mtx_lock(&cmd_buffer->device->rra_trace.data_mtx);
290
291 RADV_FROM_HANDLE(vk_acceleration_structure, structure, pInfo->dst);
292 struct hash_entry *entry = _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure);
293
294 assert(entry);
295 struct radv_rra_accel_struct_data *data = entry->data;
296
297 handle_accel_struct_write(commandBuffer, structure, data);
298
299 simple_mtx_unlock(&cmd_buffer->device->rra_trace.data_mtx);
300 }
301
302 VKAPI_ATTR void VKAPI_CALL
rra_DestroyAccelerationStructureKHR(VkDevice _device,VkAccelerationStructureKHR _structure,const VkAllocationCallbacks * pAllocator)303 rra_DestroyAccelerationStructureKHR(VkDevice _device, VkAccelerationStructureKHR _structure,
304 const VkAllocationCallbacks *pAllocator)
305 {
306 if (!_structure)
307 return;
308
309 RADV_FROM_HANDLE(radv_device, device, _device);
310 simple_mtx_lock(&device->rra_trace.data_mtx);
311
312 RADV_FROM_HANDLE(vk_acceleration_structure, structure, _structure);
313
314 struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, structure);
315
316 assert(entry);
317 struct radv_rra_accel_struct_data *data = entry->data;
318
319 if (device->rra_trace.copy_after_build)
320 data->is_dead = true;
321 else
322 _mesa_hash_table_remove(device->rra_trace.accel_structs, entry);
323
324 simple_mtx_unlock(&device->rra_trace.data_mtx);
325
326 device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, _structure, pAllocator);
327 }
328
329 VKAPI_ATTR VkResult VKAPI_CALL
rra_QueueSubmit2KHR(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)330 rra_QueueSubmit2KHR(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
331 {
332 RADV_FROM_HANDLE(radv_queue, queue, _queue);
333 struct radv_device *device = queue->device;
334
335 VkResult result = device->layer_dispatch.rra.QueueSubmit2KHR(_queue, submitCount, pSubmits, _fence);
336 if (result != VK_SUCCESS || !device->rra_trace.triggered)
337 return result;
338
339 uint32_t total_trace_count = 0;
340
341 simple_mtx_lock(&device->rra_trace.data_mtx);
342
343 for (uint32_t submit_index = 0; submit_index < submitCount; submit_index++) {
344 for (uint32_t i = 0; i < pSubmits[submit_index].commandBufferInfoCount; i++) {
345 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pSubmits[submit_index].pCommandBufferInfos[i].commandBuffer);
346 uint32_t trace_count =
347 util_dynarray_num_elements(&cmd_buffer->ray_history, struct radv_rra_ray_history_data *);
348 if (!trace_count)
349 continue;
350
351 total_trace_count += trace_count;
352 util_dynarray_append_dynarray(&device->rra_trace.ray_history, &cmd_buffer->ray_history);
353 }
354 }
355
356 if (!total_trace_count) {
357 simple_mtx_unlock(&device->rra_trace.data_mtx);
358 return result;
359 }
360
361 result = device->layer_dispatch.rra.DeviceWaitIdle(radv_device_to_handle(device));
362
363 struct radv_ray_history_header *header = device->rra_trace.ray_history_data;
364 header->submit_base_index += total_trace_count;
365
366 simple_mtx_unlock(&device->rra_trace.data_mtx);
367
368 return result;
369 }
370