• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Friedrich Vock
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef _WIN32
25 #include <dirent.h>
26 #include <unistd.h>
27 #endif
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include "ac_gpu_info.h"
33 #include "radv_private.h"
34 
35 #define RADV_FTRACE_INSTANCE_PATH "/sys/kernel/tracing/instances/amd_rmv"
36 
37 static FILE *
open_event_file(const char * event_name,const char * event_filename,const char * mode)38 open_event_file(const char *event_name, const char *event_filename, const char *mode)
39 {
40    char filename[2048];
41    snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/events/amdgpu/%s/%s", event_name, event_filename);
42    return fopen(filename, mode);
43 }
44 
45 static bool
set_event_tracing_enabled(const char * event_name,bool enabled)46 set_event_tracing_enabled(const char *event_name, bool enabled)
47 {
48    FILE *file = open_event_file(event_name, "enable", "w");
49    if (!file)
50       return false;
51 
52    size_t written_bytes = fwrite("1", 1, 1, file);
53    fclose(file);
54    return written_bytes == 1;
55 }
56 
57 static uint16_t
trace_event_id(const char * event_name)58 trace_event_id(const char *event_name)
59 {
60    /* id is 16-bit, so <= 65535 */
61    char data[6];
62 
63    FILE *file = open_event_file(event_name, "id", "r");
64    if (!file)
65       return (uint16_t)~0;
66 
67    size_t read_bytes = fread(data, 1, 6, file);
68    fclose(file);
69 
70    if (!read_bytes)
71       return (uint16_t)~0;
72 
73    return (uint16_t)strtoul(data, NULL, 10);
74 }
75 
76 static void
open_trace_pipe(uint32_t cpu_index,int * dst_fd)77 open_trace_pipe(uint32_t cpu_index, int *dst_fd)
78 {
79 #ifdef _WIN32
80    *dst_fd = -1;
81 #else
82    char filename[2048];
83    snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/per_cpu/cpu%d/trace_pipe_raw", cpu_index);
84    /* I/O to the pipe needs to be non-blocking, otherwise reading all available
85     * data would block indefinitely by waiting for more data to be written to the pipe */
86    *dst_fd = open(filename, O_RDONLY | O_NONBLOCK);
87 #endif
88 }
89 
90 /*
91  * Kernel trace buffer parsing
92  */
93 
94 struct trace_page_header {
95    uint64_t timestamp;
96    int32_t commit;
97 };
98 
99 enum trace_event_type { TRACE_EVENT_TYPE_PADDING = 29, TRACE_EVENT_TYPE_EXTENDED_DELTA, TRACE_EVENT_TYPE_TIMESTAMP };
100 
101 struct trace_event_header {
102    uint32_t type_len : 5;
103    uint32_t time_delta : 27;
104    /* Only present if length is too big for type_len */
105    uint32_t excess_length;
106 };
107 
108 struct trace_event_common {
109    unsigned short type;
110    unsigned char flags;
111    unsigned char preempt_count;
112    int pid;
113 };
114 
115 struct trace_event_amdgpu_vm_update_ptes {
116    struct trace_event_common common;
117    uint64_t start;
118    uint64_t end;
119    uint64_t flags;
120    unsigned int num_ptes;
121    uint64_t incr;
122    int pid;
123    uint64_t vm_ctx;
124 };
125 
126 /* Represents a dynamic array of addresses in the ftrace buffer. */
127 struct trace_event_address_array {
128    uint16_t data_size;
129    uint16_t reserved;
130    char data[];
131 };
132 
133 /* Possible flags for PTEs, taken from amdgpu_vm.h */
134 #define AMDGPU_PTE_VALID  (1ULL << 0)
135 #define AMDGPU_PTE_SYSTEM (1ULL << 1)
136 #define AMDGPU_PTE_PRT    (1ULL << 51)
137 
138 /* The minimum size of a GPU page */
139 #define MIN_GPU_PAGE_SIZE 4096
140 
141 static void
emit_page_table_update_event(struct vk_memory_trace_data * data,bool is_apu,uint64_t timestamp,struct trace_event_amdgpu_vm_update_ptes * event,uint64_t * addrs,unsigned int pte_index)142 emit_page_table_update_event(struct vk_memory_trace_data *data, bool is_apu, uint64_t timestamp,
143                              struct trace_event_amdgpu_vm_update_ptes *event, uint64_t *addrs, unsigned int pte_index)
144 {
145    struct vk_rmv_token token;
146 
147    uint64_t end_addr;
148    /* There may be more updated PTEs than the ones reported in the ftrace buffer.
149     * We choose the reported end virtual address here to report the correct total committed memory. */
150    if (pte_index == event->num_ptes - 1)
151       end_addr = event->end;
152    else
153       end_addr = event->start + (pte_index + 1) * (event->incr / MIN_GPU_PAGE_SIZE);
154    uint64_t start_addr = event->start + pte_index * (event->incr / MIN_GPU_PAGE_SIZE);
155 
156    token.type = VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE;
157    token.timestamp = timestamp;
158    token.data.page_table_update.type = VK_RMV_PAGE_TABLE_UPDATE_TYPE_UPDATE;
159    token.data.page_table_update.page_size = event->incr;
160    token.data.page_table_update.page_count = (end_addr - start_addr) * MIN_GPU_PAGE_SIZE / event->incr;
161    token.data.page_table_update.pid = event->common.pid;
162    token.data.page_table_update.virtual_address = event->start * MIN_GPU_PAGE_SIZE + pte_index * event->incr;
163    /* RMV expects mappings to system memory to have a physical address of 0.
164     * Even with traces generated by AMDGPU-PRO, on APUs without dedicated VRAM everything seems to
165     * be marked as "committed to system memory". */
166    token.data.page_table_update.physical_address = event->flags & AMDGPU_PTE_SYSTEM || is_apu ? 0 : addrs[pte_index];
167 
168    token.data.page_table_update.is_unmap = !(event->flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT));
169    util_dynarray_append(&data->tokens, struct vk_rmv_token, token);
170 }
171 
172 static void
evaluate_trace_event(struct radv_device * device,uint64_t timestamp,struct util_dynarray * tokens,struct trace_event_amdgpu_vm_update_ptes * event)173 evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens,
174                      struct trace_event_amdgpu_vm_update_ptes *event)
175 {
176    if (event->common.pid != getpid() && event->pid != getpid()) {
177       return;
178    }
179 
180    struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1);
181 
182    for (uint32_t i = 0; i < event->num_ptes; ++i)
183       emit_page_table_update_event(&device->vk.memory_trace_data, !device->physical_device->rad_info.has_dedicated_vram,
184                                    timestamp, event, (uint64_t *)array->data, i);
185 }
186 
187 static void
append_trace_events(struct radv_device * device,int pipe_fd)188 append_trace_events(struct radv_device *device, int pipe_fd)
189 {
190    /* Assuming 4KB if os_get_page_size fails. */
191    uint64_t page_size = 4096;
192    os_get_page_size(&page_size);
193 
194    uint64_t timestamp;
195 
196    /*
197     * Parse the trace ring buffer page by page.
198     */
199    char *page = (char *)malloc(page_size);
200    if (!page) {
201       return;
202    }
203    int64_t read_bytes;
204    do {
205       read_bytes = (int64_t)read(pipe_fd, page, page_size);
206       if (read_bytes < (int64_t)sizeof(struct trace_page_header))
207          break;
208 
209       struct trace_page_header *page_header = (struct trace_page_header *)page;
210       timestamp = page_header->timestamp;
211 
212       size_t data_size = MIN2((size_t)read_bytes, (size_t)page_header->commit);
213 
214       char *read_ptr = page + sizeof(struct trace_page_header);
215       while (read_ptr - page < data_size) {
216          struct trace_event_header *event_header = (struct trace_event_header *)read_ptr;
217          read_ptr += sizeof(struct trace_event_header);
218 
219          /* Handle special event type, see include/linux/ring_buffer.h in the
220           * kernel source */
221          switch (event_header->type_len) {
222          case TRACE_EVENT_TYPE_PADDING:
223             if (event_header->time_delta) {
224                /* Specified size, skip past padding */
225                read_ptr += event_header->excess_length;
226                timestamp += event_header->time_delta;
227                continue;
228             } else {
229                /* Padding is until end of page, skip until next page */
230                read_ptr = page + data_size;
231                continue;
232             }
233          case TRACE_EVENT_TYPE_EXTENDED_DELTA:
234             timestamp += event_header->time_delta;
235             timestamp += (uint64_t)event_header->excess_length << 27ULL;
236             continue;
237          case TRACE_EVENT_TYPE_TIMESTAMP:
238             timestamp = event_header->time_delta;
239             timestamp |= (uint64_t)event_header->excess_length << 27ULL;
240             continue;
241          default:
242             break;
243          }
244 
245          timestamp += event_header->time_delta;
246 
247          /* If type_len is not one of the special types and not zero, it is
248           * the data length / 4. */
249          size_t length;
250          struct trace_event_common *event;
251          if (event_header->type_len) {
252             length = event_header->type_len * 4 + 4;
253             /* The length variable already contains event data in this case.
254              */
255             event = (struct trace_event_common *)&event_header->excess_length;
256          } else {
257             length = event_header->excess_length + 4;
258             event = (struct trace_event_common *)read_ptr;
259          }
260 
261          if (event->type == device->memory_trace.ftrace_update_ptes_id)
262             evaluate_trace_event(device, timestamp, &device->vk.memory_trace_data.tokens,
263                                  (struct trace_event_amdgpu_vm_update_ptes *)event);
264 
265          read_ptr += length - sizeof(struct trace_event_header);
266       }
267    } while (true);
268 
269    free(page);
270 }
271 
272 static void
close_pipe_fds(struct radv_device * device)273 close_pipe_fds(struct radv_device *device)
274 {
275    for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
276       close(device->memory_trace.pipe_fds[i]);
277    }
278 }
279 
280 void
radv_memory_trace_init(struct radv_device * device)281 radv_memory_trace_init(struct radv_device *device)
282 {
283 #ifndef _WIN32
284    DIR *dir = opendir(RADV_FTRACE_INSTANCE_PATH);
285    if (!dir) {
286       fprintf(stderr,
287               "radv: Couldn't initialize memory tracing: "
288               "Can't access the tracing instance directory (%s)\n",
289               strerror(errno));
290       goto error;
291    }
292    closedir(dir);
293 
294    device->memory_trace.num_cpus = 0;
295 
296    char cpuinfo_line[1024];
297    FILE *cpuinfo_file = fopen("/proc/cpuinfo", "r");
298    uint32_t num_physical_cores;
299    while (fgets(cpuinfo_line, sizeof(cpuinfo_line), cpuinfo_file)) {
300       char *logical_core_string = strstr(cpuinfo_line, "siblings");
301       if (logical_core_string)
302          sscanf(logical_core_string, "siblings : %d", &device->memory_trace.num_cpus);
303       char *physical_core_string = strstr(cpuinfo_line, "cpu cores");
304       if (physical_core_string)
305          sscanf(physical_core_string, "cpu cores : %d", &num_physical_cores);
306    }
307    if (!device->memory_trace.num_cpus)
308       device->memory_trace.num_cpus = num_physical_cores;
309    fclose(cpuinfo_file);
310 
311    FILE *clock_file = fopen(RADV_FTRACE_INSTANCE_PATH "/trace_clock", "w");
312    if (!clock_file) {
313       fprintf(stderr,
314               "radv: Couldn't initialize memory tracing: "
315               "Can't access the tracing control files (%s).\n",
316               strerror(errno));
317       goto error;
318    }
319 
320    fprintf(clock_file, "mono");
321    fclose(clock_file);
322 
323    device->memory_trace.pipe_fds = malloc(device->memory_trace.num_cpus * sizeof(int));
324 
325    if (!device->memory_trace.pipe_fds) {
326       device->memory_trace.num_cpus = 0;
327    }
328    for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
329       open_trace_pipe(i, device->memory_trace.pipe_fds + i);
330 
331       if (device->memory_trace.pipe_fds[i] == -1) {
332          fprintf(stderr,
333                  "radv: Couldn't initialize memory tracing: "
334                  "Can't access the trace buffer pipes (%s).\n",
335                  strerror(errno));
336          for (i -= 1; i < device->memory_trace.num_cpus; --i) {
337             close(device->memory_trace.pipe_fds[i]);
338          }
339          goto error;
340       }
341    }
342 
343    device->memory_trace.ftrace_update_ptes_id = trace_event_id("amdgpu_vm_update_ptes");
344    if (device->memory_trace.ftrace_update_ptes_id == (uint16_t)~0U) {
345       fprintf(stderr,
346               "radv: Couldn't initialize memory tracing: "
347               "Can't access the trace event ID file (%s).\n",
348               strerror(errno));
349       goto error_pipes;
350    }
351 
352    if (!set_event_tracing_enabled("amdgpu_vm_update_ptes", true)) {
353       fprintf(stderr,
354               "radv: Couldn't initialize memory tracing: "
355               "Can't enable trace events (%s).\n",
356               strerror(errno));
357       goto error_pipes;
358    }
359 
360    fprintf(stderr, "radv: Enabled Memory Trace.\n");
361    return;
362 
363 error_pipes:
364    close_pipe_fds(device);
365 error:
366    vk_memory_trace_finish(&device->vk);
367 #endif
368 }
369 
370 static void
fill_memory_info(const struct radeon_info * info,struct vk_rmv_memory_info * out_info,int32_t index)371 fill_memory_info(const struct radeon_info *info, struct vk_rmv_memory_info *out_info, int32_t index)
372 {
373    switch (index) {
374    case VK_RMV_MEMORY_LOCATION_DEVICE:
375       out_info->physical_base_address = 0;
376       out_info->size =
377          info->all_vram_visible ? (uint64_t)info->vram_size_kb * 1024ULL : (uint64_t)info->vram_vis_size_kb * 1024ULL;
378       break;
379    case VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE:
380       out_info->physical_base_address = (uint64_t)info->vram_vis_size_kb * 1024ULL;
381       out_info->size = info->all_vram_visible ? 0 : (uint64_t)info->vram_size_kb * 1024ULL;
382       break;
383    case VK_RMV_MEMORY_LOCATION_HOST: {
384       uint64_t ram_size = -1U;
385       os_get_total_physical_memory(&ram_size);
386       out_info->physical_base_address = 0;
387       out_info->size = MIN2((uint64_t)info->gart_size_kb * 1024ULL, ram_size);
388    } break;
389    default:
390       unreachable("invalid memory index");
391    }
392 }
393 
394 static enum vk_rmv_memory_type
memory_type_from_vram_type(uint32_t vram_type)395 memory_type_from_vram_type(uint32_t vram_type)
396 {
397    switch (vram_type) {
398    case AMD_VRAM_TYPE_UNKNOWN:
399       return VK_RMV_MEMORY_TYPE_UNKNOWN;
400    case AMD_VRAM_TYPE_DDR2:
401       return VK_RMV_MEMORY_TYPE_DDR2;
402    case AMD_VRAM_TYPE_DDR3:
403       return VK_RMV_MEMORY_TYPE_DDR3;
404    case AMD_VRAM_TYPE_DDR4:
405       return VK_RMV_MEMORY_TYPE_DDR4;
406    case AMD_VRAM_TYPE_GDDR5:
407       return VK_RMV_MEMORY_TYPE_GDDR5;
408    case AMD_VRAM_TYPE_HBM:
409       return VK_RMV_MEMORY_TYPE_HBM;
410    case AMD_VRAM_TYPE_GDDR6:
411       return VK_RMV_MEMORY_TYPE_GDDR6;
412    case AMD_VRAM_TYPE_DDR5:
413       return VK_RMV_MEMORY_TYPE_DDR5;
414    case AMD_VRAM_TYPE_LPDDR4:
415       return VK_RMV_MEMORY_TYPE_LPDDR4;
416    case AMD_VRAM_TYPE_LPDDR5:
417       return VK_RMV_MEMORY_TYPE_LPDDR5;
418    default:
419       unreachable("Invalid vram type");
420    }
421 }
422 
423 void
radv_rmv_fill_device_info(const struct radv_physical_device * device,struct vk_rmv_device_info * info)424 radv_rmv_fill_device_info(const struct radv_physical_device *device, struct vk_rmv_device_info *info)
425 {
426    const struct radeon_info *rad_info = &device->rad_info;
427 
428    for (int32_t i = 0; i < VK_RMV_MEMORY_LOCATION_COUNT; ++i) {
429       fill_memory_info(rad_info, &info->memory_infos[i], i);
430    }
431 
432    if (rad_info->marketing_name)
433       strncpy(info->device_name, rad_info->marketing_name, sizeof(info->device_name) - 1);
434    info->pcie_family_id = rad_info->family_id;
435    info->pcie_revision_id = rad_info->pci_rev_id;
436    info->pcie_device_id = rad_info->pci.dev;
437    info->minimum_shader_clock = 0;
438    info->maximum_shader_clock = rad_info->max_gpu_freq_mhz;
439    info->vram_type = memory_type_from_vram_type(rad_info->vram_type);
440    info->vram_bus_width = rad_info->memory_bus_width;
441    info->vram_operations_per_clock = ac_memory_ops_per_clock(rad_info->vram_type);
442    info->minimum_memory_clock = 0;
443    info->maximum_memory_clock = rad_info->memory_freq_mhz;
444    info->vram_bandwidth = rad_info->memory_bandwidth_gbps;
445 }
446 
447 void
radv_rmv_collect_trace_events(struct radv_device * device)448 radv_rmv_collect_trace_events(struct radv_device *device)
449 {
450    for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
451       append_trace_events(device, device->memory_trace.pipe_fds[i]);
452    }
453 }
454 
455 void
radv_memory_trace_finish(struct radv_device * device)456 radv_memory_trace_finish(struct radv_device *device)
457 {
458    if (!device->vk.memory_trace_data.is_enabled)
459       return;
460 
461    set_event_tracing_enabled("amdgpu_vm_update_ptes", false);
462    close_pipe_fds(device);
463 }
464 
465 /* The token lock must be held when entering _locked functions */
466 static void
log_resource_bind_locked(struct radv_device * device,uint64_t resource,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size)467 log_resource_bind_locked(struct radv_device *device, uint64_t resource, struct radeon_winsys_bo *bo, uint64_t offset,
468                          uint64_t size)
469 {
470    struct vk_rmv_resource_bind_token token = {0};
471    token.address = bo->va + offset;
472    token.is_system_memory = bo->initial_domain & RADEON_DOMAIN_GTT;
473    token.size = size;
474    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, resource);
475 
476    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &token);
477 }
478 
479 void
radv_rmv_log_heap_create(struct radv_device * device,VkDeviceMemory heap,bool is_internal,VkMemoryAllocateFlags alloc_flags)480 radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
481                          VkMemoryAllocateFlags alloc_flags)
482 {
483    if (!device->vk.memory_trace_data.is_enabled)
484       return;
485 
486    RADV_FROM_HANDLE(radv_device_memory, memory, heap);
487 
488    /* Do not log zero-sized device memory objects. */
489    if (!memory->alloc_size)
490       return;
491 
492    radv_rmv_log_bo_allocate(device, memory->bo, memory->alloc_size, false);
493    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
494 
495    struct vk_rmv_resource_create_token token = {0};
496    token.is_driver_internal = is_internal;
497    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap);
498    token.type = VK_RMV_RESOURCE_TYPE_HEAP;
499    token.heap.alignment = device->physical_device->rad_info.max_alignment;
500    token.heap.size = memory->alloc_size;
501    token.heap.heap_index = memory->heap_index;
502    token.heap.alloc_flags = alloc_flags;
503 
504    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token);
505    log_resource_bind_locked(device, (uint64_t)heap, memory->bo, 0, memory->alloc_size);
506    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
507 }
508 
509 void
radv_rmv_log_bo_allocate(struct radv_device * device,struct radeon_winsys_bo * bo,uint32_t size,bool is_internal)510 radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t size, bool is_internal)
511 {
512    if (!device->vk.memory_trace_data.is_enabled)
513       return;
514 
515    struct vk_rmv_virtual_allocate_token token = {0};
516    token.address = bo->va;
517    /* If all VRAM is visible, no bo will be in invisible memory. */
518    token.is_in_invisible_vram = bo->vram_no_cpu_access && !device->physical_device->rad_info.all_vram_visible;
519    token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain;
520    token.is_driver_internal = is_internal;
521    token.page_count = DIV_ROUND_UP(size, 4096);
522 
523    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
524    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE, &token);
525    radv_rmv_collect_trace_events(device);
526    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
527 }
528 
529 void
radv_rmv_log_bo_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)530 radv_rmv_log_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
531 {
532    if (!device->vk.memory_trace_data.is_enabled)
533       return;
534 
535    struct vk_rmv_virtual_free_token token = {0};
536    token.address = bo->va;
537 
538    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
539    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_FREE, &token);
540    radv_rmv_collect_trace_events(device);
541    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
542 }
543 
544 void
radv_rmv_log_buffer_bind(struct radv_device * device,VkBuffer _buffer)545 radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer)
546 {
547    if (!device->vk.memory_trace_data.is_enabled)
548       return;
549 
550    RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
551    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
552    log_resource_bind_locked(device, (uint64_t)_buffer, buffer->bo, buffer->offset, buffer->vk.size);
553    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
554 }
555 
556 void
radv_rmv_log_image_create(struct radv_device * device,const VkImageCreateInfo * create_info,bool is_internal,VkImage _image)557 radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal,
558                           VkImage _image)
559 {
560    if (!device->vk.memory_trace_data.is_enabled)
561       return;
562 
563    RADV_FROM_HANDLE(radv_image, image, _image);
564 
565    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
566    struct vk_rmv_resource_create_token token = {0};
567    token.is_driver_internal = is_internal;
568    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_image);
569    token.type = VK_RMV_RESOURCE_TYPE_IMAGE;
570    token.image.create_flags = create_info->flags;
571    token.image.usage_flags = create_info->usage;
572    token.image.type = create_info->imageType;
573    token.image.extent = create_info->extent;
574    token.image.format = create_info->format;
575    token.image.num_mips = create_info->mipLevels;
576    token.image.num_slices = create_info->arrayLayers;
577    token.image.tiling = create_info->tiling;
578    token.image.alignment_log2 = util_logbase2(image->alignment);
579    token.image.log2_samples = util_logbase2(image->vk.samples);
580    token.image.log2_storage_samples = util_logbase2(image->vk.samples);
581    token.image.metadata_alignment_log2 = image->planes[0].surface.meta_alignment_log2;
582    token.image.image_alignment_log2 = image->planes[0].surface.alignment_log2;
583    token.image.size = image->size;
584    token.image.metadata_size = image->planes[0].surface.meta_size;
585    token.image.metadata_header_size = 0;
586    token.image.metadata_offset = image->planes[0].surface.meta_offset;
587    token.image.metadata_header_offset = image->planes[0].surface.meta_offset;
588    token.image.presentable = image->planes[0].surface.is_displayable;
589 
590    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token);
591    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
592 }
593 
594 void
radv_rmv_log_image_bind(struct radv_device * device,VkImage _image)595 radv_rmv_log_image_bind(struct radv_device *device, VkImage _image)
596 {
597    if (!device->vk.memory_trace_data.is_enabled)
598       return;
599 
600    RADV_FROM_HANDLE(radv_image, image, _image);
601    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
602    log_resource_bind_locked(device, (uint64_t)_image, image->bindings[0].bo, image->bindings[0].offset, image->size);
603    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
604 }
605 
606 void
radv_rmv_log_query_pool_create(struct radv_device * device,VkQueryPool _pool,bool is_internal)607 radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool _pool, bool is_internal)
608 {
609    if (!device->vk.memory_trace_data.is_enabled)
610       return;
611 
612    RADV_FROM_HANDLE(radv_query_pool, pool, _pool);
613 
614    if (pool->vk.query_type != VK_QUERY_TYPE_OCCLUSION && pool->vk.query_type != VK_QUERY_TYPE_PIPELINE_STATISTICS &&
615        pool->vk.query_type != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)
616       return;
617 
618    radv_rmv_log_bo_allocate(device, pool->bo, pool->size, is_internal);
619 
620    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
621    struct vk_rmv_resource_create_token create_token = {0};
622    create_token.is_driver_internal = is_internal;
623    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
624    create_token.type = VK_RMV_RESOURCE_TYPE_QUERY_HEAP;
625    create_token.query_pool.type = pool->vk.query_type;
626    create_token.query_pool.has_cpu_access = true;
627 
628    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
629    log_resource_bind_locked(device, (uint64_t)_pool, pool->bo, 0, pool->size);
630    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
631 }
632 
633 void
radv_rmv_log_command_buffer_bo_create(struct radv_device * device,struct radeon_winsys_bo * bo,uint32_t executable_size,uint32_t data_size,uint32_t scratch_size)634 radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t executable_size,
635                                       uint32_t data_size, uint32_t scratch_size)
636 {
637    if (!device->vk.memory_trace_data.is_enabled)
638       return;
639 
640    /* Only one of executable_size, data_size and scratch_size should be > 0 */
641    /* TODO: Trace CS BOs for executable data */
642    uint32_t size = MAX3(executable_size, data_size, scratch_size);
643 
644    radv_rmv_log_bo_allocate(device, bo, size, true);
645 
646    uint64_t upload_resource_identifier = (uint64_t)(uintptr_t)bo;
647 
648    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
649    struct vk_rmv_resource_create_token create_token = {0};
650    create_token.is_driver_internal = true;
651    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, upload_resource_identifier);
652    create_token.type = VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR;
653    create_token.command_buffer.preferred_domain = (enum vk_rmv_kernel_memory_domain)device->ws->cs_domain(device->ws);
654    create_token.command_buffer.executable_size = executable_size;
655    create_token.command_buffer.app_available_executable_size = executable_size;
656    create_token.command_buffer.embedded_data_size = data_size;
657    create_token.command_buffer.app_available_embedded_data_size = data_size;
658    create_token.command_buffer.scratch_size = scratch_size;
659    create_token.command_buffer.app_available_scratch_size = scratch_size;
660 
661    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
662    log_resource_bind_locked(device, upload_resource_identifier, bo, 0, size);
663    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
664    vk_rmv_log_cpu_map(&device->vk, bo->va, false);
665 }
666 
667 void
radv_rmv_log_command_buffer_bo_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)668 radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
669 {
670    if (!device->vk.memory_trace_data.is_enabled)
671       return;
672 
673    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
674    struct vk_rmv_resource_destroy_token destroy_token = {0};
675    destroy_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
676 
677    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &destroy_token);
678    vk_rmv_destroy_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
679    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
680    radv_rmv_log_bo_destroy(device, bo);
681    vk_rmv_log_cpu_map(&device->vk, bo->va, true);
682 }
683 
684 void
radv_rmv_log_border_color_palette_create(struct radv_device * device,struct radeon_winsys_bo * bo)685 radv_rmv_log_border_color_palette_create(struct radv_device *device, struct radeon_winsys_bo *bo)
686 {
687    if (!device->vk.memory_trace_data.is_enabled)
688       return;
689 
690    radv_rmv_log_bo_allocate(device, bo, RADV_BORDER_COLOR_BUFFER_SIZE, true);
691    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
692    uint32_t resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
693 
694    struct vk_rmv_resource_create_token create_token = {0};
695    create_token.is_driver_internal = true;
696    create_token.resource_id = resource_id;
697    create_token.type = VK_RMV_RESOURCE_TYPE_BORDER_COLOR_PALETTE;
698    /*
699     * We have 4096 entries, but the corresponding RMV token only has 8 bits.
700     */
701    create_token.border_color_palette.num_entries = 255; /* = RADV_BORDER_COLOR_COUNT; */
702 
703    struct vk_rmv_resource_bind_token bind_token;
704    bind_token.address = bo->va;
705    bind_token.is_system_memory = false;
706    bind_token.resource_id = resource_id;
707    bind_token.size = RADV_BORDER_COLOR_BUFFER_SIZE;
708 
709    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
710    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
711    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
712    vk_rmv_log_cpu_map(&device->vk, bo->va, false);
713 }
714 
715 void
radv_rmv_log_border_color_palette_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)716 radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
717 {
718    if (!device->vk.memory_trace_data.is_enabled)
719       return;
720 
721    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
722    struct vk_rmv_resource_destroy_token token = {0};
723    /* same resource id as the create token */
724    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
725 
726    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token);
727    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
728    vk_rmv_log_cpu_map(&device->vk, bo->va, true);
729 }
730 
731 void
radv_rmv_log_sparse_add_residency(struct radv_device * device,struct radeon_winsys_bo * src_bo,uint64_t offset)732 radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
733 {
734    if (!device->vk.memory_trace_data.is_enabled)
735       return;
736 
737    struct vk_rmv_resource_reference_token token = {0};
738    token.virtual_address = src_bo->va + offset;
739    token.residency_removed = false;
740 
741    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
742    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token);
743    radv_rmv_collect_trace_events(device);
744    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
745 }
746 
747 void
radv_rmv_log_sparse_remove_residency(struct radv_device * device,struct radeon_winsys_bo * src_bo,uint64_t offset)748 radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
749 {
750    if (!device->vk.memory_trace_data.is_enabled)
751       return;
752 
753    struct vk_rmv_resource_reference_token token = {0};
754    token.virtual_address = src_bo->va + offset;
755    token.residency_removed = true;
756 
757    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
758    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token);
759    radv_rmv_collect_trace_events(device);
760    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
761 }
762 
763 void
radv_rmv_log_descriptor_pool_create(struct radv_device * device,const VkDescriptorPoolCreateInfo * create_info,VkDescriptorPool _pool,bool is_internal)764 radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info,
765                                     VkDescriptorPool _pool, bool is_internal)
766 {
767    if (!device->vk.memory_trace_data.is_enabled)
768       return;
769 
770    RADV_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
771 
772    if (pool->bo) {
773       radv_rmv_log_bo_allocate(device, pool->bo, pool->size, is_internal);
774       vk_rmv_log_cpu_map(&device->vk, pool->bo->va, false);
775    }
776 
777    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
778    struct vk_rmv_resource_create_token create_token = {0};
779    create_token.is_driver_internal = false;
780    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
781    create_token.type = VK_RMV_RESOURCE_TYPE_DESCRIPTOR_POOL;
782    create_token.descriptor_pool.max_sets = create_info->maxSets;
783    create_token.descriptor_pool.pool_size_count = create_info->poolSizeCount;
784    /* Using vk_rmv_token_pool_alloc frees the allocation automatically when the trace is done. */
785    create_token.descriptor_pool.pool_sizes = malloc(create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
786    if (!create_token.descriptor_pool.pool_sizes)
787       return;
788 
789    memcpy(create_token.descriptor_pool.pool_sizes, create_info->pPoolSizes,
790           create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
791 
792    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
793    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
794 
795    if (pool->bo) {
796       simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
797       struct vk_rmv_resource_bind_token bind_token;
798       bind_token.address = pool->bo->va;
799       bind_token.is_system_memory = false;
800       bind_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
801       bind_token.size = pool->size;
802 
803       vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
804       simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
805    }
806 }
807 
808 void
radv_rmv_log_graphics_pipeline_create(struct radv_device * device,struct radv_pipeline * pipeline,bool is_internal)809 radv_rmv_log_graphics_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal)
810 {
811    if (!device->vk.memory_trace_data.is_enabled)
812       return;
813 
814    VkPipeline _pipeline = radv_pipeline_to_handle(pipeline);
815    struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
816 
817    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
818    struct vk_rmv_resource_create_token create_token = {0};
819    create_token.is_driver_internal = is_internal;
820    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
821    create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
822    create_token.pipeline.is_internal = is_internal;
823    create_token.pipeline.hash_lo = pipeline->pipeline_hash;
824    create_token.pipeline.is_ngg = graphics_pipeline->is_ngg;
825    create_token.pipeline.shader_stages = graphics_pipeline->active_stages;
826 
827    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
828    for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
829       struct radv_shader *shader = pipeline->shaders[s];
830 
831       if (!shader)
832          continue;
833 
834       log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
835    }
836    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
837 }
838 
839 void
radv_rmv_log_compute_pipeline_create(struct radv_device * device,struct radv_pipeline * pipeline,bool is_internal)840 radv_rmv_log_compute_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal)
841 {
842    if (!device->vk.memory_trace_data.is_enabled)
843       return;
844 
845    VkPipeline _pipeline = radv_pipeline_to_handle(pipeline);
846 
847    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
848    struct vk_rmv_resource_create_token create_token = {0};
849    create_token.is_driver_internal = is_internal;
850    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
851    create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
852    create_token.pipeline.is_internal = is_internal;
853    create_token.pipeline.hash_lo = pipeline->pipeline_hash;
854    create_token.pipeline.is_ngg = false;
855    create_token.pipeline.shader_stages = VK_SHADER_STAGE_COMPUTE_BIT;
856 
857    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
858    struct radv_shader *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
859    log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
860    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
861 }
862 
863 void
radv_rmv_log_rt_pipeline_create(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)864 radv_rmv_log_rt_pipeline_create(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
865 {
866    if (!device->vk.memory_trace_data.is_enabled)
867       return;
868 
869    VkPipeline _pipeline = radv_pipeline_to_handle(&pipeline->base.base);
870 
871    struct radv_shader *prolog = pipeline->prolog;
872    struct radv_shader *traversal = pipeline->base.base.shaders[MESA_SHADER_INTERSECTION];
873 
874    VkShaderStageFlagBits active_stages = traversal ? VK_SHADER_STAGE_INTERSECTION_BIT_KHR : 0;
875    if (prolog)
876       active_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
877 
878    for (uint32_t i = 0; i < pipeline->stage_count; i++) {
879       if (pipeline->stages[i].shader)
880          active_stages |= mesa_to_vk_shader_stage(pipeline->stages[i].stage);
881    }
882 
883    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
884 
885    struct vk_rmv_resource_create_token create_token = {0};
886    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
887    create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
888    create_token.pipeline.hash_lo = pipeline->base.base.pipeline_hash;
889    create_token.pipeline.shader_stages = active_stages;
890    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
891 
892    if (prolog)
893       log_resource_bind_locked(device, (uint64_t)_pipeline, prolog->bo, prolog->alloc->offset, prolog->alloc->size);
894 
895    if (traversal)
896       log_resource_bind_locked(device, (uint64_t)_pipeline, traversal->bo, traversal->alloc->offset,
897                                traversal->alloc->size);
898 
899    for (uint32_t i = 0; i < pipeline->non_imported_stage_count; i++) {
900       struct radv_shader *shader = pipeline->stages[i].shader;
901       if (shader)
902          log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
903    }
904 
905    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
906 }
907 
908 void
radv_rmv_log_event_create(struct radv_device * device,VkEvent _event,VkEventCreateFlags flags,bool is_internal)909 radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCreateFlags flags, bool is_internal)
910 {
911    if (!device->vk.memory_trace_data.is_enabled)
912       return;
913 
914    RADV_FROM_HANDLE(radv_event, event, _event);
915 
916    radv_rmv_log_bo_allocate(device, event->bo, 8, is_internal);
917    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
918    struct vk_rmv_resource_create_token create_token = {0};
919    create_token.is_driver_internal = is_internal;
920    create_token.type = VK_RMV_RESOURCE_TYPE_GPU_EVENT;
921    create_token.event.flags = flags;
922    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_event);
923 
924    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
925    log_resource_bind_locked(device, (uint64_t)_event, event->bo, 0, 8);
926    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
927 
928    if (event->map)
929       vk_rmv_log_cpu_map(&device->vk, event->bo->va, false);
930 }
931 
932 void
radv_rmv_log_submit(struct radv_device * device,enum amd_ip_type type)933 radv_rmv_log_submit(struct radv_device *device, enum amd_ip_type type)
934 {
935    if (!device->vk.memory_trace_data.is_enabled)
936       return;
937 
938    switch (type) {
939    case AMD_IP_GFX:
940       vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_GRAPHICS);
941       break;
942    case AMD_IP_COMPUTE:
943       vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COMPUTE);
944       break;
945    case AMD_IP_SDMA:
946       vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COPY);
947       break;
948    default:
949       unreachable("invalid ip type");
950    }
951 }
952 
953 void
radv_rmv_log_resource_destroy(struct radv_device * device,uint64_t handle)954 radv_rmv_log_resource_destroy(struct radv_device *device, uint64_t handle)
955 {
956    if (!device->vk.memory_trace_data.is_enabled || handle == 0)
957       return;
958 
959    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
960    struct vk_rmv_resource_destroy_token token = {0};
961    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, handle);
962 
963    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token);
964    vk_rmv_destroy_resource_id_locked(&device->vk, handle);
965    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
966 }
967