1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include <stdio.h>
30
31 #include "radv_amdgpu_bo.h"
32 #include "radv_debug.h"
33
34 #include <amdgpu.h>
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38 #include "drm-uapi/amdgpu_drm.h"
39
40 #include "util/os_time.h"
41 #include "util/u_atomic.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44
45 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo);
46
47 static int
radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys * ws,amdgpu_bo_handle bo,uint64_t offset,uint64_t size,uint64_t addr,uint32_t bo_flags,uint64_t internal_flags,uint32_t ops)48 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset, uint64_t size, uint64_t addr,
49 uint32_t bo_flags, uint64_t internal_flags, uint32_t ops)
50 {
51 uint64_t flags = internal_flags;
52 if (bo) {
53 flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
54
55 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.gfx_level >= GFX9)
56 flags |= AMDGPU_VM_MTYPE_UC;
57
58 if (!(bo_flags & RADEON_FLAG_READ_ONLY))
59 flags |= AMDGPU_VM_PAGE_WRITEABLE;
60 }
61
62 size = align64(size, getpagesize());
63
64 return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, flags, ops);
65 }
66
67 static int
bo_comparator(const void * ap,const void * bp)68 bo_comparator(const void *ap, const void *bp)
69 {
70 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
71 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
72 return (a > b) ? 1 : (a < b) ? -1 : 0;
73 }
74
75 static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo * bo)76 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
77 {
78 u_rwlock_wrlock(&bo->lock);
79
80 if (bo->bo_capacity < bo->range_count) {
81 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
82 struct radv_amdgpu_winsys_bo **bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
83 if (!bos) {
84 u_rwlock_wrunlock(&bo->lock);
85 return VK_ERROR_OUT_OF_HOST_MEMORY;
86 }
87 bo->bos = bos;
88 bo->bo_capacity = new_count;
89 }
90
91 uint32_t temp_bo_count = 0;
92 for (uint32_t i = 0; i < bo->range_count; ++i)
93 if (bo->ranges[i].bo)
94 bo->bos[temp_bo_count++] = bo->ranges[i].bo;
95
96 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
97
98 if (!temp_bo_count) {
99 bo->bo_count = 0;
100 } else {
101 uint32_t final_bo_count = 1;
102 for (uint32_t i = 1; i < temp_bo_count; ++i)
103 if (bo->bos[i] != bo->bos[i - 1])
104 bo->bos[final_bo_count++] = bo->bos[i];
105
106 bo->bo_count = final_bo_count;
107 }
108
109 u_rwlock_wrunlock(&bo->lock);
110 return VK_SUCCESS;
111 }
112
113 static VkResult
radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys * _ws,struct radeon_winsys_bo * _parent,uint64_t offset,uint64_t size,struct radeon_winsys_bo * _bo,uint64_t bo_offset)114 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent, uint64_t offset,
115 uint64_t size, struct radeon_winsys_bo *_bo, uint64_t bo_offset)
116 {
117 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
118 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
119 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
120 int range_count_delta, new_idx;
121 int first = 0, last;
122 struct radv_amdgpu_map_range new_first, new_last;
123 VkResult result;
124 int r;
125
126 assert(parent->is_virtual);
127 assert(!bo || !bo->is_virtual);
128
129 /* When the BO is NULL, AMDGPU will reset the PTE VA range to the initial state. Otherwise, it
130 * will first unmap all existing VA that overlap the requested range and then map.
131 */
132 if (bo) {
133 r = radv_amdgpu_bo_va_op(ws, bo->bo, bo_offset, size, parent->base.va + offset, 0, 0, AMDGPU_VA_OP_REPLACE);
134 } else {
135 r =
136 radv_amdgpu_bo_va_op(ws, NULL, 0, size, parent->base.va + offset, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE);
137 }
138
139 if (r) {
140 fprintf(stderr, "radv/amdgpu: Failed to replace a PRT VA region (%d).\n", r);
141 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
142 }
143
144 /* Do not add the BO to the virtual BO list if it's already in the global list to avoid dangling
145 * BO references because it might have been destroyed without being previously unbound. Resetting
146 * it to NULL clears the old BO ranges if present.
147 *
148 * This is going to be clarified in the Vulkan spec:
149 * https://gitlab.khronos.org/vulkan/vulkan/-/issues/3125
150 *
151 * The issue still exists for non-global BO but it will be addressed later, once we are 100% it's
152 * RADV fault (mostly because the solution looks more complicated).
153 */
154 if (bo && radv_buffer_is_resident(&bo->base)) {
155 bo = NULL;
156 bo_offset = 0;
157 }
158
159 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
160 * contains the newly bound range). */
161 if (parent->range_capacity - parent->range_count < 2) {
162 uint32_t range_capacity = parent->range_capacity + 2;
163 struct radv_amdgpu_map_range *ranges =
164 realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
165 if (!ranges)
166 return VK_ERROR_OUT_OF_HOST_MEMORY;
167 parent->ranges = ranges;
168 parent->range_capacity = range_capacity;
169 }
170
171 /*
172 * [first, last] is exactly the range of ranges that either overlap the
173 * new parent, or are adjacent to it. This corresponds to the bind ranges
174 * that may change.
175 */
176 while (first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
177 ++first;
178
179 last = first;
180 while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
181 ++last;
182
183 /* Whether the first or last range are going to be totally removed or just
184 * resized/left alone. Note that in the case of first == last, we will split
185 * this into a part before and after the new range. The remove flag is then
186 * whether to not create the corresponding split part. */
187 bool remove_first = parent->ranges[first].offset == offset;
188 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
189
190 assert(parent->ranges[first].offset <= offset);
191 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
192
193 /* Try to merge the new range with the first range. */
194 if (parent->ranges[first].bo == bo &&
195 (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
196 size += offset - parent->ranges[first].offset;
197 offset = parent->ranges[first].offset;
198 bo_offset = parent->ranges[first].bo_offset;
199 remove_first = true;
200 }
201
202 /* Try to merge the new range with the last range. */
203 if (parent->ranges[last].bo == bo &&
204 (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
205 size = parent->ranges[last].offset + parent->ranges[last].size - offset;
206 remove_last = true;
207 }
208
209 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
210 new_idx = first + !remove_first;
211
212 /* If the first/last range are not left alone we unmap then and optionally map
213 * them again after modifications. Not that this implicitly can do the splitting
214 * if first == last. */
215 new_first = parent->ranges[first];
216 new_last = parent->ranges[last];
217
218 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
219 if (!remove_first) {
220 new_first.size = offset - new_first.offset;
221 }
222 }
223
224 if (parent->ranges[last].offset < offset + size || remove_last) {
225 if (!remove_last) {
226 new_last.size -= offset + size - new_last.offset;
227 new_last.bo_offset += (offset + size - new_last.offset);
228 new_last.offset = offset + size;
229 }
230 }
231
232 /* Moves the range list after last to account for the changed number of ranges. */
233 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
234 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
235
236 if (!remove_first)
237 parent->ranges[first] = new_first;
238
239 if (!remove_last)
240 parent->ranges[new_idx + 1] = new_last;
241
242 /* Actually set up the new range. */
243 parent->ranges[new_idx].offset = offset;
244 parent->ranges[new_idx].size = size;
245 parent->ranges[new_idx].bo = bo;
246 parent->ranges[new_idx].bo_offset = bo_offset;
247
248 parent->range_count += range_count_delta;
249
250 result = radv_amdgpu_winsys_rebuild_bo_list(parent);
251 if (result != VK_SUCCESS)
252 return result;
253
254 return VK_SUCCESS;
255 }
256
257 static void
radv_amdgpu_log_bo(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo,bool destroyed)258 radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, bool destroyed)
259 {
260 struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
261
262 if (!ws->debug_log_bos)
263 return;
264
265 bo_log = malloc(sizeof(*bo_log));
266 if (!bo_log)
267 return;
268
269 bo_log->va = bo->base.va;
270 bo_log->size = bo->size;
271 bo_log->timestamp = os_time_get_nano();
272 bo_log->is_virtual = bo->is_virtual;
273 bo_log->destroyed = destroyed;
274
275 u_rwlock_wrlock(&ws->log_bo_list_lock);
276 list_addtail(&bo_log->list, &ws->log_bo_list);
277 u_rwlock_wrunlock(&ws->log_bo_list_lock);
278 }
279
280 static int
radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)281 radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
282 {
283 u_rwlock_wrlock(&ws->global_bo_list.lock);
284 if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
285 unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
286 void *data = realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
287 if (!data) {
288 u_rwlock_wrunlock(&ws->global_bo_list.lock);
289 return VK_ERROR_OUT_OF_HOST_MEMORY;
290 }
291
292 ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
293 ws->global_bo_list.capacity = capacity;
294 }
295
296 ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
297 bo->base.use_global_list = true;
298 u_rwlock_wrunlock(&ws->global_bo_list.lock);
299 return VK_SUCCESS;
300 }
301
302 static void
radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)303 radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
304 {
305 u_rwlock_wrlock(&ws->global_bo_list.lock);
306 for (unsigned i = ws->global_bo_list.count; i-- > 0;) {
307 if (ws->global_bo_list.bos[i] == bo) {
308 ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
309 --ws->global_bo_list.count;
310 bo->base.use_global_list = false;
311 break;
312 }
313 }
314 u_rwlock_wrunlock(&ws->global_bo_list.lock);
315 }
316
317 static void
radv_amdgpu_winsys_bo_destroy(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo)318 radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
319 {
320 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
321 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
322
323 radv_amdgpu_log_bo(ws, bo, true);
324
325 if (bo->is_virtual) {
326 int r;
327
328 /* Clear mappings of this PRT VA region. */
329 r = radv_amdgpu_bo_va_op(ws, NULL, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_CLEAR);
330 if (r) {
331 fprintf(stderr, "radv/amdgpu: Failed to clear a PRT VA region (%d).\n", r);
332 }
333
334 free(bo->bos);
335 free(bo->ranges);
336 u_rwlock_destroy(&bo->lock);
337 } else {
338 if (ws->debug_all_bos)
339 radv_amdgpu_global_bo_list_del(ws, bo);
340 radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_UNMAP);
341 amdgpu_bo_free(bo->bo);
342 }
343
344 if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
345 if (bo->base.vram_no_cpu_access) {
346 p_atomic_add(&ws->allocated_vram, -align64(bo->size, ws->info.gart_page_size));
347 } else {
348 p_atomic_add(&ws->allocated_vram_vis, -align64(bo->size, ws->info.gart_page_size));
349 }
350 }
351
352 if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
353 p_atomic_add(&ws->allocated_gtt, -align64(bo->size, ws->info.gart_page_size));
354
355 amdgpu_va_range_free(bo->va_handle);
356 FREE(bo);
357 }
358
359 static VkResult
radv_amdgpu_winsys_bo_create(struct radeon_winsys * _ws,uint64_t size,unsigned alignment,enum radeon_bo_domain initial_domain,enum radeon_bo_flag flags,unsigned priority,uint64_t replay_address,struct radeon_winsys_bo ** out_bo)360 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
361 enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, unsigned priority,
362 uint64_t replay_address, struct radeon_winsys_bo **out_bo)
363 {
364 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
365 struct radv_amdgpu_winsys_bo *bo;
366 struct amdgpu_bo_alloc_request request = {0};
367 struct radv_amdgpu_map_range *ranges = NULL;
368 amdgpu_bo_handle buf_handle;
369 uint64_t va = 0;
370 amdgpu_va_handle va_handle;
371 int r;
372 VkResult result = VK_SUCCESS;
373
374 /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
375 */
376 *out_bo = NULL;
377
378 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
379 if (!bo) {
380 return VK_ERROR_OUT_OF_HOST_MEMORY;
381 }
382
383 unsigned virt_alignment = alignment;
384 if (size >= ws->info.pte_fragment_size)
385 virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
386
387 assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE));
388
389 const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH | (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
390 (flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0);
391 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, &va,
392 &va_handle, va_flags);
393 if (r) {
394 result = replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
395 goto error_va_alloc;
396 }
397
398 bo->base.va = va;
399 bo->va_handle = va_handle;
400 bo->size = size;
401 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
402
403 if (flags & RADEON_FLAG_VIRTUAL) {
404 ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
405 if (!ranges) {
406 result = VK_ERROR_OUT_OF_HOST_MEMORY;
407 goto error_ranges_alloc;
408 }
409
410 u_rwlock_init(&bo->lock);
411
412 bo->ranges = ranges;
413 bo->range_count = 1;
414 bo->range_capacity = 1;
415
416 bo->ranges[0].offset = 0;
417 bo->ranges[0].size = size;
418 bo->ranges[0].bo = NULL;
419 bo->ranges[0].bo_offset = 0;
420
421 /* Reserve a PRT VA region. */
422 r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
423 if (r) {
424 fprintf(stderr, "radv/amdgpu: Failed to reserve a PRT VA region (%d).\n", r);
425 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
426 goto error_ranges_alloc;
427 }
428
429 radv_amdgpu_log_bo(ws, bo, false);
430
431 *out_bo = (struct radeon_winsys_bo *)bo;
432 return VK_SUCCESS;
433 }
434
435 request.alloc_size = size;
436 request.phys_alignment = alignment;
437
438 if (initial_domain & RADEON_DOMAIN_VRAM) {
439 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
440
441 /* Since VRAM and GTT have almost the same performance on
442 * APUs, we could just set GTT. However, in order to decrease
443 * GTT(RAM) usage, which is shared with the OS, allow VRAM
444 * placements too. The idea is not to use VRAM usefully, but
445 * to use it so that it's not unused and wasted.
446 *
447 * Furthermore, even on discrete GPUs this is beneficial. If
448 * both GTT and VRAM are set then AMDGPU still prefers VRAM
449 * for the initial placement, but it makes the buffers
450 * spillable. Otherwise AMDGPU tries to place the buffers in
451 * VRAM really hard to the extent that we are getting a lot
452 * of unnecessary movement. This helps significantly when
453 * e.g. Horizon Zero Dawn allocates more memory than we have
454 * VRAM.
455 */
456 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
457 }
458
459 if (initial_domain & RADEON_DOMAIN_GTT)
460 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
461 if (initial_domain & RADEON_DOMAIN_GDS)
462 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
463 if (initial_domain & RADEON_DOMAIN_OA)
464 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
465
466 if (flags & RADEON_FLAG_CPU_ACCESS)
467 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
468 if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
469 bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
470 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
471 }
472 if (flags & RADEON_FLAG_GTT_WC)
473 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
474 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
475 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
476 if ((initial_domain & RADEON_DOMAIN_VRAM_GTT) && (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) &&
477 ((ws->perftest & RADV_PERFTEST_LOCAL_BOS) || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
478 bo->base.is_local = true;
479 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
480 }
481
482 if (initial_domain & RADEON_DOMAIN_VRAM) {
483 if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
484 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
485 }
486
487 if (flags & RADEON_FLAG_DISCARDABLE && ws->info.drm_minor >= 47)
488 request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
489
490 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
491 if (r) {
492 fprintf(stderr, "radv/amdgpu: Failed to allocate a buffer:\n");
493 fprintf(stderr, "radv/amdgpu: size : %" PRIu64 " bytes\n", size);
494 fprintf(stderr, "radv/amdgpu: alignment : %u bytes\n", alignment);
495 fprintf(stderr, "radv/amdgpu: domains : %u\n", initial_domain);
496 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
497 goto error_bo_alloc;
498 }
499
500 r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0, AMDGPU_VA_OP_MAP);
501 if (r) {
502 result = VK_ERROR_UNKNOWN;
503 goto error_va_map;
504 }
505
506 bo->bo = buf_handle;
507 bo->base.initial_domain = initial_domain;
508 bo->base.use_global_list = false;
509 bo->priority = priority;
510
511 r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
512 assert(!r);
513
514 if (initial_domain & RADEON_DOMAIN_VRAM) {
515 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
516 * aren't mappable and they are counted as part of the VRAM
517 * counter.
518 *
519 * Otherwise, buffers with the CPU_ACCESS flag or without any
520 * of both (imported buffers) are counted as part of the VRAM
521 * visible counter because they can be mapped.
522 */
523 if (bo->base.vram_no_cpu_access) {
524 p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
525 } else {
526 p_atomic_add(&ws->allocated_vram_vis, align64(bo->size, ws->info.gart_page_size));
527 }
528 }
529
530 if (initial_domain & RADEON_DOMAIN_GTT)
531 p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
532
533 if (ws->debug_all_bos)
534 radv_amdgpu_global_bo_list_add(ws, bo);
535 radv_amdgpu_log_bo(ws, bo, false);
536
537 *out_bo = (struct radeon_winsys_bo *)bo;
538 return VK_SUCCESS;
539 error_va_map:
540 amdgpu_bo_free(buf_handle);
541
542 error_bo_alloc:
543 free(ranges);
544
545 error_ranges_alloc:
546 amdgpu_va_range_free(va_handle);
547
548 error_va_alloc:
549 FREE(bo);
550 return result;
551 }
552
553 static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo * _bo)554 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
555 {
556 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
557 int ret;
558 void *data;
559 ret = amdgpu_bo_cpu_map(bo->bo, &data);
560 if (ret)
561 return NULL;
562 return data;
563 }
564
565 static void
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo * _bo)566 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
567 {
568 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
569 amdgpu_bo_cpu_unmap(bo->bo);
570 }
571
572 static uint64_t
radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys * ws,uint64_t size,unsigned alignment)573 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size, unsigned alignment)
574 {
575 uint64_t vm_alignment = alignment;
576
577 /* Increase the VM alignment for faster address translation. */
578 if (size >= ws->info.pte_fragment_size)
579 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
580
581 /* Gfx9: Increase the VM alignment to the most significant bit set
582 * in the size for faster address translation.
583 */
584 if (ws->info.gfx_level >= GFX9) {
585 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
586 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
587
588 vm_alignment = MAX2(vm_alignment, msb_alignment);
589 }
590 return vm_alignment;
591 }
592
593 static VkResult
radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys * _ws,void * pointer,uint64_t size,unsigned priority,struct radeon_winsys_bo ** out_bo)594 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size, unsigned priority,
595 struct radeon_winsys_bo **out_bo)
596 {
597 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
598 amdgpu_bo_handle buf_handle;
599 struct radv_amdgpu_winsys_bo *bo;
600 uint64_t va;
601 amdgpu_va_handle va_handle;
602 uint64_t vm_alignment;
603 VkResult result = VK_SUCCESS;
604 int ret;
605
606 /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
607 */
608 *out_bo = NULL;
609
610 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
611 if (!bo)
612 return VK_ERROR_OUT_OF_HOST_MEMORY;
613
614 ret = amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle);
615 if (ret) {
616 if (ret == -EINVAL) {
617 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
618 } else {
619 result = VK_ERROR_UNKNOWN;
620 }
621 goto error;
622 }
623
624 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
625 * are imported.
626 */
627 vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size);
628
629 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va, &va_handle,
630 AMDGPU_VA_RANGE_HIGH)) {
631 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
632 goto error_va_alloc;
633 }
634
635 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP)) {
636 result = VK_ERROR_UNKNOWN;
637 goto error_va_map;
638 }
639
640 /* Initialize it */
641 bo->base.va = va;
642 bo->va_handle = va_handle;
643 bo->size = size;
644 bo->bo = buf_handle;
645 bo->base.initial_domain = RADEON_DOMAIN_GTT;
646 bo->base.use_global_list = false;
647 bo->priority = priority;
648
649 ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
650 assert(!r);
651
652 p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
653
654 if (ws->debug_all_bos)
655 radv_amdgpu_global_bo_list_add(ws, bo);
656 radv_amdgpu_log_bo(ws, bo, false);
657
658 *out_bo = (struct radeon_winsys_bo *)bo;
659 return VK_SUCCESS;
660
661 error_va_map:
662 amdgpu_va_range_free(va_handle);
663
664 error_va_alloc:
665 amdgpu_bo_free(buf_handle);
666
667 error:
668 FREE(bo);
669 return result;
670 }
671
672 static VkResult
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys * _ws,int fd,unsigned priority,struct radeon_winsys_bo ** out_bo,uint64_t * alloc_size)673 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority, struct radeon_winsys_bo **out_bo,
674 uint64_t *alloc_size)
675 {
676 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
677 struct radv_amdgpu_winsys_bo *bo;
678 uint64_t va;
679 amdgpu_va_handle va_handle;
680 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
681 struct amdgpu_bo_import_result result;
682 struct amdgpu_bo_info info;
683 enum radeon_bo_domain initial = 0;
684 int r;
685 VkResult vk_result = VK_SUCCESS;
686
687 /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
688 */
689 *out_bo = NULL;
690
691 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
692 if (!bo)
693 return VK_ERROR_OUT_OF_HOST_MEMORY;
694
695 r = amdgpu_bo_import(ws->dev, type, fd, &result);
696 if (r) {
697 vk_result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
698 goto error;
699 }
700
701 r = amdgpu_bo_query_info(result.buf_handle, &info);
702 if (r) {
703 vk_result = VK_ERROR_UNKNOWN;
704 goto error_query;
705 }
706
707 if (alloc_size) {
708 *alloc_size = info.alloc_size;
709 }
710
711 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0, &va, &va_handle,
712 AMDGPU_VA_RANGE_HIGH);
713 if (r) {
714 vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
715 goto error_query;
716 }
717
718 r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
719 if (r) {
720 vk_result = VK_ERROR_UNKNOWN;
721 goto error_va_map;
722 }
723
724 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
725 initial |= RADEON_DOMAIN_VRAM;
726 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
727 initial |= RADEON_DOMAIN_GTT;
728
729 bo->bo = result.buf_handle;
730 bo->base.va = va;
731 bo->va_handle = va_handle;
732 bo->base.initial_domain = initial;
733 bo->base.use_global_list = false;
734 bo->size = result.alloc_size;
735 bo->priority = priority;
736
737 r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
738 assert(!r);
739
740 if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
741 p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
742 if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
743 p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
744
745 if (ws->debug_all_bos)
746 radv_amdgpu_global_bo_list_add(ws, bo);
747 radv_amdgpu_log_bo(ws, bo, false);
748
749 *out_bo = (struct radeon_winsys_bo *)bo;
750 return VK_SUCCESS;
751 error_va_map:
752 amdgpu_va_range_free(va_handle);
753
754 error_query:
755 amdgpu_bo_free(result.buf_handle);
756
757 error:
758 FREE(bo);
759 return vk_result;
760 }
761
762 static bool
radv_amdgpu_winsys_get_fd(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,int * fd)763 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, int *fd)
764 {
765 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
766 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
767 int r;
768 unsigned handle;
769 r = amdgpu_bo_export(bo->bo, type, &handle);
770 if (r)
771 return false;
772
773 *fd = (int)handle;
774 return true;
775 }
776
777 static bool
radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys * _ws,int fd,enum radeon_bo_domain * domains,enum radeon_bo_flag * flags)778 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd, enum radeon_bo_domain *domains,
779 enum radeon_bo_flag *flags)
780 {
781 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
782 struct amdgpu_bo_import_result result = {0};
783 struct amdgpu_bo_info info = {0};
784 int r;
785
786 *domains = 0;
787 *flags = 0;
788
789 r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
790 if (r)
791 return false;
792
793 r = amdgpu_bo_query_info(result.buf_handle, &info);
794 amdgpu_bo_free(result.buf_handle);
795 if (r)
796 return false;
797
798 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
799 *domains |= RADEON_DOMAIN_VRAM;
800 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
801 *domains |= RADEON_DOMAIN_GTT;
802 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
803 *domains |= RADEON_DOMAIN_GDS;
804 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
805 *domains |= RADEON_DOMAIN_OA;
806
807 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
808 *flags |= RADEON_FLAG_CPU_ACCESS;
809 if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
810 *flags |= RADEON_FLAG_NO_CPU_ACCESS;
811 if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
812 *flags |= RADEON_FLAG_IMPLICIT_SYNC;
813 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
814 *flags |= RADEON_FLAG_GTT_WC;
815 if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
816 *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
817 if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
818 *flags |= RADEON_FLAG_ZERO_VRAM;
819 return true;
820 }
821
822 static unsigned
eg_tile_split(unsigned tile_split)823 eg_tile_split(unsigned tile_split)
824 {
825 switch (tile_split) {
826 case 0:
827 tile_split = 64;
828 break;
829 case 1:
830 tile_split = 128;
831 break;
832 case 2:
833 tile_split = 256;
834 break;
835 case 3:
836 tile_split = 512;
837 break;
838 default:
839 case 4:
840 tile_split = 1024;
841 break;
842 case 5:
843 tile_split = 2048;
844 break;
845 case 6:
846 tile_split = 4096;
847 break;
848 }
849 return tile_split;
850 }
851
852 static unsigned
radv_eg_tile_split_rev(unsigned eg_tile_split)853 radv_eg_tile_split_rev(unsigned eg_tile_split)
854 {
855 switch (eg_tile_split) {
856 case 64:
857 return 0;
858 case 128:
859 return 1;
860 case 256:
861 return 2;
862 case 512:
863 return 3;
864 default:
865 case 1024:
866 return 4;
867 case 2048:
868 return 5;
869 case 4096:
870 return 6;
871 }
872 }
873
874 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
875 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
876
877 static void
radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)878 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
879 struct radeon_bo_metadata *md)
880 {
881 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
882 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
883 struct amdgpu_bo_metadata metadata = {0};
884 uint64_t tiling_flags = 0;
885
886 if (ws->info.gfx_level >= GFX9) {
887 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
888 tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
889 tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
890 tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
891 tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
892 tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
893 tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
894 } else {
895 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
896 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
897 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
898 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
899 else
900 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
901
902 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
903 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
904 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
905 if (md->u.legacy.tile_split)
906 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
907 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
908 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1);
909
910 if (md->u.legacy.scanout)
911 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
912 else
913 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
914 }
915
916 metadata.tiling_info = tiling_flags;
917 metadata.size_metadata = md->size_metadata;
918 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
919
920 amdgpu_bo_set_metadata(bo->bo, &metadata);
921 }
922
923 static void
radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)924 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
925 struct radeon_bo_metadata *md)
926 {
927 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
928 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
929 struct amdgpu_bo_info info = {0};
930
931 int r = amdgpu_bo_query_info(bo->bo, &info);
932 if (r)
933 return;
934
935 uint64_t tiling_flags = info.metadata.tiling_info;
936
937 if (ws->info.gfx_level >= GFX9) {
938 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
939 md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
940 } else {
941 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
942 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
943
944 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
945 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
946 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
947 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
948
949 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
950 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
951 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
952 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
953 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
954 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
955 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
956 }
957
958 md->size_metadata = info.metadata.size_metadata;
959 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
960 }
961
962 static VkResult
radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,bool resident)963 radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool resident)
964 {
965 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
966 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
967 VkResult result = VK_SUCCESS;
968
969 /* Do not add the BO to the global list if it's a local BO because the
970 * kernel maintains a list for us.
971 */
972 if (bo->base.is_local)
973 return VK_SUCCESS;
974
975 /* Do not add the BO twice to the global list if the allbos debug
976 * option is enabled.
977 */
978 if (ws->debug_all_bos)
979 return VK_SUCCESS;
980
981 if (resident) {
982 result = radv_amdgpu_global_bo_list_add(ws, bo);
983 } else {
984 radv_amdgpu_global_bo_list_del(ws, bo);
985 }
986
987 return result;
988 }
989
990 static int
radv_amdgpu_bo_va_compare(const void * a,const void * b)991 radv_amdgpu_bo_va_compare(const void *a, const void *b)
992 {
993 const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo *const *)a;
994 const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo *const *)b;
995 return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
996 }
997
998 static uint64_t
radv_amdgpu_canonicalize_va(uint64_t va)999 radv_amdgpu_canonicalize_va(uint64_t va)
1000 {
1001 /* Would be less hardcoded to use addr32_hi (0xffff8000) to generate a mask,
1002 * but there are confusing differences between page fault reports from kernel where
1003 * it seems to report the top 48 bits, where addr32_hi has 47-bits. */
1004 return va & ((1ull << 48) - 1);
1005 }
1006
1007 static void
radv_amdgpu_dump_bo_log(struct radeon_winsys * _ws,FILE * file)1008 radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
1009 {
1010 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1011 struct radv_amdgpu_winsys_bo_log *bo_log;
1012
1013 if (!ws->debug_log_bos)
1014 return;
1015
1016 u_rwlock_rdlock(&ws->log_bo_list_lock);
1017 LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) {
1018 fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n", (long long)bo_log->timestamp,
1019 (long long)radv_amdgpu_canonicalize_va(bo_log->va),
1020 (long long)radv_amdgpu_canonicalize_va(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual);
1021 }
1022 u_rwlock_rdunlock(&ws->log_bo_list_lock);
1023 }
1024
1025 static void
radv_amdgpu_dump_bo_ranges(struct radeon_winsys * _ws,FILE * file)1026 radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
1027 {
1028 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1029 if (ws->debug_all_bos) {
1030 struct radv_amdgpu_winsys_bo **bos = NULL;
1031 int i = 0;
1032
1033 u_rwlock_rdlock(&ws->global_bo_list.lock);
1034 bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
1035 if (!bos) {
1036 u_rwlock_rdunlock(&ws->global_bo_list.lock);
1037 fprintf(file, " Failed to allocate memory to sort VA ranges for dumping\n");
1038 return;
1039 }
1040
1041 for (i = 0; i < ws->global_bo_list.count; i++) {
1042 bos[i] = ws->global_bo_list.bos[i];
1043 }
1044 qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
1045
1046 for (i = 0; i < ws->global_bo_list.count; ++i) {
1047 fprintf(file, " VA=%.16llx-%.16llx, handle=%d\n", (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va),
1048 (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va + bos[i]->size), bos[i]->bo_handle);
1049 }
1050 free(bos);
1051 u_rwlock_rdunlock(&ws->global_bo_list.lock);
1052 } else
1053 fprintf(file, " To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
1054 }
1055 void
radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys * ws)1056 radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
1057 {
1058 ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
1059 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
1060 ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
1061 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
1062 ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
1063 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
1064 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
1065 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
1066 ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
1067 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
1068 ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
1069 ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
1070 ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
1071 ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
1072 }
1073