• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Friedrich Vock
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #ifndef _WIN32
8 #include <dirent.h>
9 #include <unistd.h>
10 #endif
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include "ac_gpu_info.h"
16 #include "radv_buffer.h"
17 #include "radv_descriptor_set.h"
18 #include "radv_device_memory.h"
19 #include "radv_event.h"
20 #include "radv_image.h"
21 #include "radv_pipeline_graphics.h"
22 #include "radv_pipeline_rt.h"
23 #include "radv_query.h"
24 #include "radv_rmv.h"
25 
26 #define RADV_FTRACE_INSTANCE_PATH "/sys/kernel/tracing/instances/amd_rmv"
27 
28 static FILE *
open_event_file(const char * event_name,const char * event_filename,const char * mode)29 open_event_file(const char *event_name, const char *event_filename, const char *mode)
30 {
31    char filename[2048];
32    snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/events/amdgpu/%s/%s", event_name, event_filename);
33    return fopen(filename, mode);
34 }
35 
36 static bool
set_event_tracing_enabled(const char * event_name,bool enabled)37 set_event_tracing_enabled(const char *event_name, bool enabled)
38 {
39    FILE *file = open_event_file(event_name, "enable", "w");
40    if (!file)
41       return false;
42 
43    size_t written_bytes = fwrite("1", 1, 1, file);
44    fclose(file);
45    return written_bytes == 1;
46 }
47 
48 static uint16_t
trace_event_id(const char * event_name)49 trace_event_id(const char *event_name)
50 {
51    /* id is 16-bit, so <= 65535 */
52    char data[6];
53 
54    FILE *file = open_event_file(event_name, "id", "r");
55    if (!file)
56       return (uint16_t)~0;
57 
58    size_t read_bytes = fread(data, 1, 6, file);
59    fclose(file);
60 
61    if (!read_bytes)
62       return (uint16_t)~0;
63 
64    return (uint16_t)strtoul(data, NULL, 10);
65 }
66 
67 static void
open_trace_pipe(uint32_t cpu_index,int * dst_fd)68 open_trace_pipe(uint32_t cpu_index, int *dst_fd)
69 {
70 #ifdef _WIN32
71    *dst_fd = -1;
72 #else
73    char filename[2048];
74    snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/per_cpu/cpu%d/trace_pipe_raw", cpu_index);
75    /* I/O to the pipe needs to be non-blocking, otherwise reading all available
76     * data would block indefinitely by waiting for more data to be written to the pipe */
77    *dst_fd = open(filename, O_RDONLY | O_NONBLOCK);
78 #endif
79 }
80 
81 /*
82  * Kernel trace buffer parsing
83  */
84 
85 struct trace_page_header {
86    uint64_t timestamp;
87    int32_t commit;
88 };
89 
90 enum trace_event_type { TRACE_EVENT_TYPE_PADDING = 29, TRACE_EVENT_TYPE_EXTENDED_DELTA, TRACE_EVENT_TYPE_TIMESTAMP };
91 
92 struct trace_event_header {
93    uint32_t type_len : 5;
94    uint32_t time_delta : 27;
95    /* Only present if length is too big for type_len */
96    uint32_t excess_length;
97 };
98 
99 struct trace_event_common {
100    unsigned short type;
101    unsigned char flags;
102    unsigned char preempt_count;
103    int pid;
104 };
105 
106 struct trace_event_amdgpu_vm_update_ptes {
107    struct trace_event_common common;
108    uint64_t start;
109    uint64_t end;
110    uint64_t flags;
111    unsigned int num_ptes;
112    uint64_t incr;
113    int pid;
114    uint64_t vm_ctx;
115 };
116 
117 /* Represents a dynamic array of addresses in the ftrace buffer. */
118 struct trace_event_address_array {
119    uint16_t data_size;
120    uint16_t reserved;
121    char data[];
122 };
123 
124 /* Possible flags for PTEs, taken from amdgpu_vm.h */
125 #define AMDGPU_PTE_VALID  (1ULL << 0)
126 #define AMDGPU_PTE_SYSTEM (1ULL << 1)
127 #define AMDGPU_PTE_PRT    (1ULL << 51)
128 
129 /* The minimum size of a GPU page */
130 #define MIN_GPU_PAGE_SIZE 4096
131 
132 static void
emit_page_table_update_event(struct vk_memory_trace_data * data,bool is_apu,uint64_t timestamp,struct trace_event_amdgpu_vm_update_ptes * event,uint64_t * addrs,unsigned int pte_index)133 emit_page_table_update_event(struct vk_memory_trace_data *data, bool is_apu, uint64_t timestamp,
134                              struct trace_event_amdgpu_vm_update_ptes *event, uint64_t *addrs, unsigned int pte_index)
135 {
136    struct vk_rmv_token token;
137 
138    uint64_t end_addr;
139    /* There may be more updated PTEs than the ones reported in the ftrace buffer.
140     * We choose the reported end virtual address here to report the correct total committed memory. */
141    if (pte_index == event->num_ptes - 1)
142       end_addr = event->end;
143    else
144       end_addr = event->start + (pte_index + 1) * (event->incr / MIN_GPU_PAGE_SIZE);
145    uint64_t start_addr = event->start + pte_index * (event->incr / MIN_GPU_PAGE_SIZE);
146 
147    token.type = VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE;
148    token.timestamp = timestamp;
149    token.data.page_table_update.type = VK_RMV_PAGE_TABLE_UPDATE_TYPE_UPDATE;
150    token.data.page_table_update.page_size = event->incr;
151    token.data.page_table_update.page_count = (end_addr - start_addr) * MIN_GPU_PAGE_SIZE / event->incr;
152    token.data.page_table_update.pid = event->common.pid;
153    token.data.page_table_update.virtual_address = event->start * MIN_GPU_PAGE_SIZE + pte_index * event->incr;
154    /* RMV expects mappings to system memory to have a physical address of 0.
155     * Even with traces generated by AMDGPU-PRO, on APUs without dedicated VRAM everything seems to
156     * be marked as "committed to system memory". */
157    token.data.page_table_update.physical_address = event->flags & AMDGPU_PTE_SYSTEM || is_apu ? 0 : addrs[pte_index];
158 
159    token.data.page_table_update.is_unmap = !(event->flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT));
160    util_dynarray_append(&data->tokens, struct vk_rmv_token, token);
161 }
162 
163 static void
evaluate_trace_event(struct radv_device * device,uint64_t timestamp,struct util_dynarray * tokens,struct trace_event_amdgpu_vm_update_ptes * event)164 evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens,
165                      struct trace_event_amdgpu_vm_update_ptes *event)
166 {
167    const struct radv_physical_device *pdev = radv_device_physical(device);
168 
169    if (event->common.pid != getpid() && event->pid != getpid()) {
170       return;
171    }
172 
173    struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1);
174 
175    for (uint32_t i = 0; i < event->num_ptes; ++i)
176       emit_page_table_update_event(&device->vk.memory_trace_data, !pdev->info.has_dedicated_vram, timestamp, event,
177                                    (uint64_t *)array->data, i);
178 }
179 
180 static void
append_trace_events(struct radv_device * device,int pipe_fd)181 append_trace_events(struct radv_device *device, int pipe_fd)
182 {
183    /* Assuming 4KB if os_get_page_size fails. */
184    uint64_t page_size = 4096;
185    os_get_page_size(&page_size);
186 
187    uint64_t timestamp;
188 
189    /*
190     * Parse the trace ring buffer page by page.
191     */
192    char *page = (char *)malloc(page_size);
193    if (!page) {
194       return;
195    }
196    int64_t read_bytes;
197    do {
198       read_bytes = (int64_t)read(pipe_fd, page, page_size);
199       if (read_bytes < (int64_t)sizeof(struct trace_page_header))
200          break;
201 
202       struct trace_page_header *page_header = (struct trace_page_header *)page;
203       timestamp = page_header->timestamp;
204 
205       size_t data_size = MIN2((size_t)read_bytes, (size_t)page_header->commit);
206 
207       char *read_ptr = page + sizeof(struct trace_page_header);
208       while (read_ptr - page < data_size) {
209          struct trace_event_header *event_header = (struct trace_event_header *)read_ptr;
210          read_ptr += sizeof(struct trace_event_header);
211 
212          /* Handle special event type, see include/linux/ring_buffer.h in the
213           * kernel source */
214          switch (event_header->type_len) {
215          case TRACE_EVENT_TYPE_PADDING:
216             if (event_header->time_delta) {
217                /* Specified size, skip past padding */
218                read_ptr += event_header->excess_length;
219                timestamp += event_header->time_delta;
220                continue;
221             } else {
222                /* Padding is until end of page, skip until next page */
223                read_ptr = page + data_size;
224                continue;
225             }
226          case TRACE_EVENT_TYPE_EXTENDED_DELTA:
227             timestamp += event_header->time_delta;
228             timestamp += (uint64_t)event_header->excess_length << 27ULL;
229             continue;
230          case TRACE_EVENT_TYPE_TIMESTAMP:
231             timestamp = event_header->time_delta;
232             timestamp |= (uint64_t)event_header->excess_length << 27ULL;
233             continue;
234          default:
235             break;
236          }
237 
238          timestamp += event_header->time_delta;
239 
240          /* If type_len is not one of the special types and not zero, it is
241           * the data length / 4. */
242          size_t length;
243          struct trace_event_common *event;
244          if (event_header->type_len) {
245             length = event_header->type_len * 4 + 4;
246             /* The length variable already contains event data in this case.
247              */
248             event = (struct trace_event_common *)&event_header->excess_length;
249          } else {
250             length = event_header->excess_length + 4;
251             event = (struct trace_event_common *)read_ptr;
252          }
253 
254          if (event->type == device->memory_trace.ftrace_update_ptes_id)
255             evaluate_trace_event(device, timestamp, &device->vk.memory_trace_data.tokens,
256                                  (struct trace_event_amdgpu_vm_update_ptes *)event);
257 
258          read_ptr += length - sizeof(struct trace_event_header);
259       }
260    } while (true);
261 
262    free(page);
263 }
264 
265 static void
close_pipe_fds(struct radv_device * device)266 close_pipe_fds(struct radv_device *device)
267 {
268    for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
269       close(device->memory_trace.pipe_fds[i]);
270    }
271 }
272 
273 void
radv_memory_trace_init(struct radv_device * device)274 radv_memory_trace_init(struct radv_device *device)
275 {
276 #ifndef _WIN32
277    DIR *dir = opendir(RADV_FTRACE_INSTANCE_PATH);
278    if (!dir) {
279       fprintf(stderr,
280               "radv: Couldn't initialize memory tracing: "
281               "Can't access the tracing instance directory (%s)\n",
282               strerror(errno));
283       goto error;
284    }
285    closedir(dir);
286 
287    device->memory_trace.num_cpus = 0;
288 
289    char cpuinfo_line[1024];
290    FILE *cpuinfo_file = fopen("/proc/cpuinfo", "r");
291    uint32_t num_physical_cores;
292    while (fgets(cpuinfo_line, sizeof(cpuinfo_line), cpuinfo_file)) {
293       char *logical_core_string = strstr(cpuinfo_line, "siblings");
294       if (logical_core_string)
295          sscanf(logical_core_string, "siblings : %d", &device->memory_trace.num_cpus);
296       char *physical_core_string = strstr(cpuinfo_line, "cpu cores");
297       if (physical_core_string)
298          sscanf(physical_core_string, "cpu cores : %d", &num_physical_cores);
299    }
300    if (!device->memory_trace.num_cpus)
301       device->memory_trace.num_cpus = num_physical_cores;
302    fclose(cpuinfo_file);
303 
304    FILE *clock_file = fopen(RADV_FTRACE_INSTANCE_PATH "/trace_clock", "w");
305    if (!clock_file) {
306       fprintf(stderr,
307               "radv: Couldn't initialize memory tracing: "
308               "Can't access the tracing control files (%s).\n",
309               strerror(errno));
310       goto error;
311    }
312 
313    fprintf(clock_file, "mono");
314    fclose(clock_file);
315 
316    device->memory_trace.pipe_fds = malloc(device->memory_trace.num_cpus * sizeof(int));
317 
318    if (!device->memory_trace.pipe_fds) {
319       device->memory_trace.num_cpus = 0;
320    }
321    for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
322       open_trace_pipe(i, device->memory_trace.pipe_fds + i);
323 
324       if (device->memory_trace.pipe_fds[i] == -1) {
325          fprintf(stderr,
326                  "radv: Couldn't initialize memory tracing: "
327                  "Can't access the trace buffer pipes (%s).\n",
328                  strerror(errno));
329          for (i -= 1; i < device->memory_trace.num_cpus; --i) {
330             close(device->memory_trace.pipe_fds[i]);
331          }
332          goto error;
333       }
334    }
335 
336    device->memory_trace.ftrace_update_ptes_id = trace_event_id("amdgpu_vm_update_ptes");
337    if (device->memory_trace.ftrace_update_ptes_id == (uint16_t)~0U) {
338       fprintf(stderr,
339               "radv: Couldn't initialize memory tracing: "
340               "Can't access the trace event ID file (%s).\n",
341               strerror(errno));
342       goto error_pipes;
343    }
344 
345    if (!set_event_tracing_enabled("amdgpu_vm_update_ptes", true)) {
346       fprintf(stderr,
347               "radv: Couldn't initialize memory tracing: "
348               "Can't enable trace events (%s).\n",
349               strerror(errno));
350       goto error_pipes;
351    }
352 
353    fprintf(stderr, "radv: Enabled Memory Trace.\n");
354    return;
355 
356 error_pipes:
357    close_pipe_fds(device);
358 error:
359    vk_memory_trace_finish(&device->vk);
360 #endif
361 }
362 
363 static void
fill_memory_info(const struct radeon_info * gpu_info,struct vk_rmv_memory_info * out_info,int32_t index)364 fill_memory_info(const struct radeon_info *gpu_info, struct vk_rmv_memory_info *out_info, int32_t index)
365 {
366    switch (index) {
367    case VK_RMV_MEMORY_LOCATION_DEVICE:
368       out_info->physical_base_address = 0;
369       out_info->size = gpu_info->all_vram_visible ? (uint64_t)gpu_info->vram_size_kb * 1024ULL
370                                                   : (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL;
371       break;
372    case VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE:
373       out_info->physical_base_address = (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL;
374       out_info->size = gpu_info->all_vram_visible ? 0 : (uint64_t)gpu_info->vram_size_kb * 1024ULL;
375       break;
376    case VK_RMV_MEMORY_LOCATION_HOST: {
377       uint64_t ram_size = -1U;
378       os_get_total_physical_memory(&ram_size);
379       out_info->physical_base_address = 0;
380       out_info->size = MIN2((uint64_t)gpu_info->gart_size_kb * 1024ULL, ram_size);
381    } break;
382    default:
383       unreachable("invalid memory index");
384    }
385 }
386 
387 static enum vk_rmv_memory_type
memory_type_from_vram_type(uint32_t vram_type)388 memory_type_from_vram_type(uint32_t vram_type)
389 {
390    switch (vram_type) {
391    case AMD_VRAM_TYPE_UNKNOWN:
392       return VK_RMV_MEMORY_TYPE_UNKNOWN;
393    case AMD_VRAM_TYPE_DDR2:
394       return VK_RMV_MEMORY_TYPE_DDR2;
395    case AMD_VRAM_TYPE_DDR3:
396       return VK_RMV_MEMORY_TYPE_DDR3;
397    case AMD_VRAM_TYPE_DDR4:
398       return VK_RMV_MEMORY_TYPE_DDR4;
399    case AMD_VRAM_TYPE_GDDR5:
400       return VK_RMV_MEMORY_TYPE_GDDR5;
401    case AMD_VRAM_TYPE_HBM:
402       return VK_RMV_MEMORY_TYPE_HBM;
403    case AMD_VRAM_TYPE_GDDR6:
404       return VK_RMV_MEMORY_TYPE_GDDR6;
405    case AMD_VRAM_TYPE_DDR5:
406       return VK_RMV_MEMORY_TYPE_DDR5;
407    case AMD_VRAM_TYPE_LPDDR4:
408       return VK_RMV_MEMORY_TYPE_LPDDR4;
409    case AMD_VRAM_TYPE_LPDDR5:
410       return VK_RMV_MEMORY_TYPE_LPDDR5;
411    default:
412       unreachable("Invalid vram type");
413    }
414 }
415 
416 void
radv_rmv_fill_device_info(const struct radv_physical_device * pdev,struct vk_rmv_device_info * info)417 radv_rmv_fill_device_info(const struct radv_physical_device *pdev, struct vk_rmv_device_info *info)
418 {
419    const struct radeon_info *gpu_info = &pdev->info;
420 
421    for (int32_t i = 0; i < VK_RMV_MEMORY_LOCATION_COUNT; ++i) {
422       fill_memory_info(gpu_info, &info->memory_infos[i], i);
423    }
424 
425    if (gpu_info->marketing_name)
426       strncpy(info->device_name, gpu_info->marketing_name, sizeof(info->device_name) - 1);
427    info->pcie_family_id = gpu_info->family_id;
428    info->pcie_revision_id = gpu_info->pci_rev_id;
429    info->pcie_device_id = gpu_info->pci.dev;
430    info->minimum_shader_clock = 0;
431    info->maximum_shader_clock = gpu_info->max_gpu_freq_mhz;
432    info->vram_type = memory_type_from_vram_type(gpu_info->vram_type);
433    info->vram_bus_width = gpu_info->memory_bus_width;
434    info->vram_operations_per_clock = ac_memory_ops_per_clock(gpu_info->vram_type);
435    info->minimum_memory_clock = 0;
436    info->maximum_memory_clock = gpu_info->memory_freq_mhz;
437    info->vram_bandwidth = gpu_info->memory_bandwidth_gbps;
438 }
439 
440 void
radv_rmv_collect_trace_events(struct radv_device * device)441 radv_rmv_collect_trace_events(struct radv_device *device)
442 {
443    for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
444       append_trace_events(device, device->memory_trace.pipe_fds[i]);
445    }
446 }
447 
448 void
radv_memory_trace_finish(struct radv_device * device)449 radv_memory_trace_finish(struct radv_device *device)
450 {
451    if (!device->vk.memory_trace_data.is_enabled)
452       return;
453 
454    set_event_tracing_enabled("amdgpu_vm_update_ptes", false);
455    close_pipe_fds(device);
456 }
457 
458 /* The token lock must be held when entering _locked functions */
459 static void
log_resource_bind_locked(struct radv_device * device,uint64_t resource,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size)460 log_resource_bind_locked(struct radv_device *device, uint64_t resource, struct radeon_winsys_bo *bo, uint64_t offset,
461                          uint64_t size)
462 {
463    struct vk_rmv_resource_bind_token token = {0};
464    token.address = bo->va + offset;
465    token.is_system_memory = bo->initial_domain & RADEON_DOMAIN_GTT;
466    token.size = size;
467    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, resource);
468 
469    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &token);
470 }
471 
472 void
radv_rmv_log_heap_create(struct radv_device * device,VkDeviceMemory heap,bool is_internal,VkMemoryAllocateFlags alloc_flags)473 radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
474                          VkMemoryAllocateFlags alloc_flags)
475 {
476    const struct radv_physical_device *pdev = radv_device_physical(device);
477 
478    if (!device->vk.memory_trace_data.is_enabled)
479       return;
480 
481    VK_FROM_HANDLE(radv_device_memory, memory, heap);
482 
483    /* Do not log zero-sized device memory objects. */
484    if (!memory->alloc_size)
485       return;
486 
487    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
488 
489    struct vk_rmv_resource_create_token token = {0};
490    token.is_driver_internal = is_internal;
491    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap);
492    token.type = VK_RMV_RESOURCE_TYPE_HEAP;
493    token.heap.alignment = pdev->info.max_alignment;
494    token.heap.size = memory->alloc_size;
495    token.heap.heap_index = memory->heap_index;
496    token.heap.alloc_flags = alloc_flags;
497 
498    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token);
499    log_resource_bind_locked(device, (uint64_t)heap, memory->bo, 0, memory->alloc_size);
500    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
501 }
502 
503 void
radv_rmv_log_bo_allocate(struct radv_device * device,struct radeon_winsys_bo * bo,bool is_internal)504 radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, bool is_internal)
505 {
506    const struct radv_physical_device *pdev = radv_device_physical(device);
507 
508    if (!device->vk.memory_trace_data.is_enabled)
509       return;
510 
511    /* RMV doesn't seem to support GDS/OA domains. */
512    if (!(bo->initial_domain & RADEON_DOMAIN_VRAM_GTT))
513       return;
514 
515    struct vk_rmv_virtual_allocate_token token = {0};
516    token.address = bo->va;
517    /* If all VRAM is visible, no bo will be in invisible memory. */
518    token.is_in_invisible_vram = bo->vram_no_cpu_access && !pdev->info.all_vram_visible;
519    token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain;
520    token.is_driver_internal = is_internal;
521    token.page_count = DIV_ROUND_UP(bo->size, 4096);
522 
523    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
524    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE, &token);
525    radv_rmv_collect_trace_events(device);
526    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
527 }
528 
529 void
radv_rmv_log_bo_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)530 radv_rmv_log_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
531 {
532    if (!device->vk.memory_trace_data.is_enabled)
533       return;
534 
535    /* RMV doesn't seem to support GDS/OA domains. */
536    if (!(bo->initial_domain & RADEON_DOMAIN_VRAM_GTT))
537       return;
538 
539    struct vk_rmv_virtual_free_token token = {0};
540    token.address = bo->va;
541 
542    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
543    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_FREE, &token);
544    radv_rmv_collect_trace_events(device);
545    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
546 }
547 
548 void
radv_rmv_log_buffer_bind(struct radv_device * device,VkBuffer _buffer)549 radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer)
550 {
551    if (!device->vk.memory_trace_data.is_enabled)
552       return;
553 
554    VK_FROM_HANDLE(radv_buffer, buffer, _buffer);
555    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
556    log_resource_bind_locked(device, (uint64_t)_buffer, buffer->bo, buffer->offset, buffer->vk.size);
557    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
558 }
559 
560 void
radv_rmv_log_image_create(struct radv_device * device,const VkImageCreateInfo * create_info,bool is_internal,VkImage _image)561 radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal,
562                           VkImage _image)
563 {
564    if (!device->vk.memory_trace_data.is_enabled)
565       return;
566 
567    VK_FROM_HANDLE(radv_image, image, _image);
568 
569    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
570    struct vk_rmv_resource_create_token token = {0};
571    token.is_driver_internal = is_internal;
572    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_image);
573    token.type = VK_RMV_RESOURCE_TYPE_IMAGE;
574    token.image.create_flags = create_info->flags;
575    token.image.usage_flags = create_info->usage;
576    token.image.type = create_info->imageType;
577    token.image.extent = create_info->extent;
578    token.image.format = create_info->format;
579    token.image.num_mips = create_info->mipLevels;
580    token.image.num_slices = create_info->arrayLayers;
581    token.image.tiling = create_info->tiling;
582    token.image.alignment_log2 = util_logbase2(image->alignment);
583    token.image.log2_samples = util_logbase2(image->vk.samples);
584    token.image.log2_storage_samples = util_logbase2(image->vk.samples);
585    token.image.metadata_alignment_log2 = image->planes[0].surface.meta_alignment_log2;
586    token.image.image_alignment_log2 = image->planes[0].surface.alignment_log2;
587    token.image.size = image->size;
588    token.image.metadata_size = image->planes[0].surface.meta_size;
589    token.image.metadata_header_size = 0;
590    token.image.metadata_offset = image->planes[0].surface.meta_offset;
591    token.image.metadata_header_offset = image->planes[0].surface.meta_offset;
592    token.image.presentable = image->planes[0].surface.is_displayable;
593 
594    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token);
595    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
596 }
597 
598 void
radv_rmv_log_image_bind(struct radv_device * device,uint32_t bind_idx,VkImage _image)599 radv_rmv_log_image_bind(struct radv_device *device, uint32_t bind_idx, VkImage _image)
600 {
601    if (!device->vk.memory_trace_data.is_enabled)
602       return;
603 
604    VK_FROM_HANDLE(radv_image, image, _image);
605    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
606    log_resource_bind_locked(device, (uint64_t)_image, image->bindings[bind_idx].bo, image->bindings[bind_idx].offset,
607                             image->bindings[bind_idx].range);
608    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
609 }
610 
611 void
radv_rmv_log_query_pool_create(struct radv_device * device,VkQueryPool _pool)612 radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool _pool)
613 {
614    if (!device->vk.memory_trace_data.is_enabled)
615       return;
616 
617    VK_FROM_HANDLE(radv_query_pool, pool, _pool);
618 
619    if (pool->vk.query_type != VK_QUERY_TYPE_OCCLUSION && pool->vk.query_type != VK_QUERY_TYPE_PIPELINE_STATISTICS &&
620        pool->vk.query_type != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)
621       return;
622 
623    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
624    struct vk_rmv_resource_create_token create_token = {0};
625    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
626    create_token.type = VK_RMV_RESOURCE_TYPE_QUERY_HEAP;
627    create_token.query_pool.type = pool->vk.query_type;
628    create_token.query_pool.has_cpu_access = true;
629 
630    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
631    log_resource_bind_locked(device, (uint64_t)_pool, pool->bo, 0, pool->size);
632    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
633 }
634 
635 void
radv_rmv_log_command_buffer_bo_create(struct radv_device * device,struct radeon_winsys_bo * bo,uint32_t executable_size,uint32_t data_size,uint32_t scratch_size)636 radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t executable_size,
637                                       uint32_t data_size, uint32_t scratch_size)
638 {
639    if (!device->vk.memory_trace_data.is_enabled)
640       return;
641 
642    uint64_t upload_resource_identifier = (uint64_t)(uintptr_t)bo;
643 
644    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
645    struct vk_rmv_resource_create_token create_token = {0};
646    create_token.is_driver_internal = true;
647    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, upload_resource_identifier);
648    create_token.type = VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR;
649    create_token.command_buffer.preferred_domain = (enum vk_rmv_kernel_memory_domain)device->ws->cs_domain(device->ws);
650    create_token.command_buffer.executable_size = executable_size;
651    create_token.command_buffer.app_available_executable_size = executable_size;
652    create_token.command_buffer.embedded_data_size = data_size;
653    create_token.command_buffer.app_available_embedded_data_size = data_size;
654    create_token.command_buffer.scratch_size = scratch_size;
655    create_token.command_buffer.app_available_scratch_size = scratch_size;
656 
657    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
658    log_resource_bind_locked(device, upload_resource_identifier, bo, 0, bo->size);
659    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
660    vk_rmv_log_cpu_map(&device->vk, bo->va, false);
661 }
662 
663 void
radv_rmv_log_command_buffer_bo_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)664 radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
665 {
666    radv_rmv_log_resource_destroy(device, (uint64_t)(uintptr_t)bo);
667    vk_rmv_log_cpu_map(&device->vk, bo->va, true);
668 }
669 
670 void
radv_rmv_log_border_color_palette_create(struct radv_device * device,struct radeon_winsys_bo * bo)671 radv_rmv_log_border_color_palette_create(struct radv_device *device, struct radeon_winsys_bo *bo)
672 {
673    if (!device->vk.memory_trace_data.is_enabled)
674       return;
675 
676    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
677    uint32_t resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
678 
679    struct vk_rmv_resource_create_token create_token = {0};
680    create_token.is_driver_internal = true;
681    create_token.resource_id = resource_id;
682    create_token.type = VK_RMV_RESOURCE_TYPE_BORDER_COLOR_PALETTE;
683    /*
684     * We have 4096 entries, but the corresponding RMV token only has 8 bits.
685     */
686    create_token.border_color_palette.num_entries = 255; /* = RADV_BORDER_COLOR_COUNT; */
687 
688    struct vk_rmv_resource_bind_token bind_token;
689    bind_token.address = bo->va;
690    bind_token.is_system_memory = false;
691    bind_token.resource_id = resource_id;
692    bind_token.size = RADV_BORDER_COLOR_BUFFER_SIZE;
693 
694    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
695    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
696    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
697    vk_rmv_log_cpu_map(&device->vk, bo->va, false);
698 }
699 
700 void
radv_rmv_log_border_color_palette_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)701 radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
702 {
703    radv_rmv_log_resource_destroy(device, (uint64_t)(uintptr_t)bo);
704    vk_rmv_log_cpu_map(&device->vk, bo->va, true);
705 }
706 
707 void
radv_rmv_log_sparse_add_residency(struct radv_device * device,struct radeon_winsys_bo * src_bo,uint64_t offset)708 radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
709 {
710    if (!device->vk.memory_trace_data.is_enabled)
711       return;
712 
713    struct vk_rmv_resource_reference_token token = {0};
714    token.virtual_address = src_bo->va + offset;
715    token.residency_removed = false;
716 
717    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
718    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token);
719    radv_rmv_collect_trace_events(device);
720    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
721 }
722 
723 void
radv_rmv_log_sparse_remove_residency(struct radv_device * device,struct radeon_winsys_bo * src_bo,uint64_t offset)724 radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
725 {
726    if (!device->vk.memory_trace_data.is_enabled)
727       return;
728 
729    struct vk_rmv_resource_reference_token token = {0};
730    token.virtual_address = src_bo->va + offset;
731    token.residency_removed = true;
732 
733    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
734    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token);
735    radv_rmv_collect_trace_events(device);
736    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
737 }
738 
739 void
radv_rmv_log_descriptor_pool_create(struct radv_device * device,const VkDescriptorPoolCreateInfo * create_info,VkDescriptorPool _pool)740 radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info,
741                                     VkDescriptorPool _pool)
742 {
743    if (!device->vk.memory_trace_data.is_enabled)
744       return;
745 
746    VK_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
747 
748    if (pool->bo)
749       vk_rmv_log_cpu_map(&device->vk, pool->bo->va, false);
750 
751    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
752    struct vk_rmv_resource_create_token create_token = {0};
753    create_token.is_driver_internal = false;
754    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
755    create_token.type = VK_RMV_RESOURCE_TYPE_DESCRIPTOR_POOL;
756    create_token.descriptor_pool.max_sets = create_info->maxSets;
757    create_token.descriptor_pool.pool_size_count = create_info->poolSizeCount;
758    /* Using vk_rmv_token_pool_alloc frees the allocation automatically when the trace is done. */
759    create_token.descriptor_pool.pool_sizes = malloc(create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
760    if (!create_token.descriptor_pool.pool_sizes)
761       return;
762 
763    memcpy(create_token.descriptor_pool.pool_sizes, create_info->pPoolSizes,
764           create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
765 
766    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
767    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
768 
769    if (pool->bo) {
770       simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
771       struct vk_rmv_resource_bind_token bind_token;
772       bind_token.address = pool->bo->va;
773       bind_token.is_system_memory = false;
774       bind_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
775       bind_token.size = pool->size;
776 
777       vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
778       simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
779    }
780 }
781 
782 void
radv_rmv_log_graphics_pipeline_create(struct radv_device * device,struct radv_pipeline * pipeline,bool is_internal)783 radv_rmv_log_graphics_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal)
784 {
785    if (!device->vk.memory_trace_data.is_enabled)
786       return;
787 
788    VkPipeline _pipeline = radv_pipeline_to_handle(pipeline);
789    struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
790 
791    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
792    struct vk_rmv_resource_create_token create_token = {0};
793    create_token.is_driver_internal = is_internal;
794    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
795    create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
796    create_token.pipeline.is_internal = is_internal;
797    create_token.pipeline.hash_lo = pipeline->pipeline_hash;
798    create_token.pipeline.is_ngg = graphics_pipeline->is_ngg;
799    create_token.pipeline.shader_stages = graphics_pipeline->active_stages;
800 
801    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
802    for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
803       struct radv_shader *shader = pipeline->shaders[s];
804 
805       if (!shader)
806          continue;
807 
808       log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
809    }
810    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
811 }
812 
813 void
radv_rmv_log_compute_pipeline_create(struct radv_device * device,struct radv_pipeline * pipeline,bool is_internal)814 radv_rmv_log_compute_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal)
815 {
816    if (!device->vk.memory_trace_data.is_enabled)
817       return;
818 
819    VkPipeline _pipeline = radv_pipeline_to_handle(pipeline);
820 
821    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
822    struct vk_rmv_resource_create_token create_token = {0};
823    create_token.is_driver_internal = is_internal;
824    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
825    create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
826    create_token.pipeline.is_internal = is_internal;
827    create_token.pipeline.hash_lo = pipeline->pipeline_hash;
828    create_token.pipeline.is_ngg = false;
829    create_token.pipeline.shader_stages = VK_SHADER_STAGE_COMPUTE_BIT;
830 
831    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
832    struct radv_shader *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
833    log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
834    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
835 }
836 
837 void
radv_rmv_log_rt_pipeline_create(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)838 radv_rmv_log_rt_pipeline_create(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
839 {
840    if (!device->vk.memory_trace_data.is_enabled)
841       return;
842 
843    VkPipeline _pipeline = radv_pipeline_to_handle(&pipeline->base.base);
844 
845    struct radv_shader *prolog = pipeline->prolog;
846    struct radv_shader *traversal = pipeline->base.base.shaders[MESA_SHADER_INTERSECTION];
847 
848    VkShaderStageFlagBits active_stages = traversal ? VK_SHADER_STAGE_INTERSECTION_BIT_KHR : 0;
849    if (prolog)
850       active_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
851 
852    for (uint32_t i = 0; i < pipeline->stage_count; i++) {
853       if (pipeline->stages[i].shader)
854          active_stages |= mesa_to_vk_shader_stage(pipeline->stages[i].stage);
855    }
856 
857    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
858 
859    struct vk_rmv_resource_create_token create_token = {0};
860    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
861    create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
862    create_token.pipeline.hash_lo = pipeline->base.base.pipeline_hash;
863    create_token.pipeline.shader_stages = active_stages;
864    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
865 
866    if (prolog)
867       log_resource_bind_locked(device, (uint64_t)_pipeline, prolog->bo, prolog->alloc->offset, prolog->alloc->size);
868 
869    if (traversal)
870       log_resource_bind_locked(device, (uint64_t)_pipeline, traversal->bo, traversal->alloc->offset,
871                                traversal->alloc->size);
872 
873    for (uint32_t i = 0; i < pipeline->non_imported_stage_count; i++) {
874       struct radv_shader *shader = pipeline->stages[i].shader;
875       if (shader)
876          log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
877    }
878 
879    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
880 }
881 
882 void
radv_rmv_log_event_create(struct radv_device * device,VkEvent _event,VkEventCreateFlags flags,bool is_internal)883 radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCreateFlags flags, bool is_internal)
884 {
885    if (!device->vk.memory_trace_data.is_enabled)
886       return;
887 
888    VK_FROM_HANDLE(radv_event, event, _event);
889 
890    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
891    struct vk_rmv_resource_create_token create_token = {0};
892    create_token.is_driver_internal = is_internal;
893    create_token.type = VK_RMV_RESOURCE_TYPE_GPU_EVENT;
894    create_token.event.flags = flags;
895    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_event);
896 
897    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
898    log_resource_bind_locked(device, (uint64_t)_event, event->bo, 0, 8);
899    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
900 
901    if (event->map)
902       vk_rmv_log_cpu_map(&device->vk, event->bo->va, false);
903 }
904 
905 void
radv_rmv_log_submit(struct radv_device * device,enum amd_ip_type type)906 radv_rmv_log_submit(struct radv_device *device, enum amd_ip_type type)
907 {
908    if (!device->vk.memory_trace_data.is_enabled)
909       return;
910 
911    switch (type) {
912    case AMD_IP_GFX:
913       vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_GRAPHICS);
914       break;
915    case AMD_IP_COMPUTE:
916       vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COMPUTE);
917       break;
918    case AMD_IP_SDMA:
919       vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COPY);
920       break;
921    default:
922       unreachable("invalid ip type");
923    }
924 }
925 
926 void
radv_rmv_log_resource_destroy(struct radv_device * device,uint64_t handle)927 radv_rmv_log_resource_destroy(struct radv_device *device, uint64_t handle)
928 {
929    if (!device->vk.memory_trace_data.is_enabled || handle == 0)
930       return;
931 
932    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
933    struct vk_rmv_resource_destroy_token token = {0};
934    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, handle);
935 
936    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token);
937    vk_rmv_destroy_resource_id_locked(&device->vk, handle);
938    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
939 }
940