• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based on amdgpu winsys.
6  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7  * Copyright © 2015 Advanced Micro Devices, Inc.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include <stdio.h>
30 
31 #include "radv_amdgpu_bo.h"
32 #include "radv_debug.h"
33 
34 #include <amdgpu.h>
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38 #include "drm-uapi/amdgpu_drm.h"
39 
40 #include "util/os_time.h"
41 #include "util/u_atomic.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 
45 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo);
46 
47 static int
radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys * ws,amdgpu_bo_handle bo,uint64_t offset,uint64_t size,uint64_t addr,uint32_t bo_flags,uint64_t internal_flags,uint32_t ops)48 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset, uint64_t size, uint64_t addr,
49                      uint32_t bo_flags, uint64_t internal_flags, uint32_t ops)
50 {
51    uint64_t flags = internal_flags;
52    if (bo) {
53       flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
54 
55       if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.gfx_level >= GFX9)
56          flags |= AMDGPU_VM_MTYPE_UC;
57 
58       if (!(bo_flags & RADEON_FLAG_READ_ONLY))
59          flags |= AMDGPU_VM_PAGE_WRITEABLE;
60    }
61 
62    size = align64(size, getpagesize());
63 
64    return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, flags, ops);
65 }
66 
67 static int
bo_comparator(const void * ap,const void * bp)68 bo_comparator(const void *ap, const void *bp)
69 {
70    struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
71    struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
72    return (a > b) ? 1 : (a < b) ? -1 : 0;
73 }
74 
75 static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo * bo)76 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
77 {
78    u_rwlock_wrlock(&bo->lock);
79 
80    if (bo->bo_capacity < bo->range_count) {
81       uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
82       struct radv_amdgpu_winsys_bo **bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
83       if (!bos) {
84          u_rwlock_wrunlock(&bo->lock);
85          return VK_ERROR_OUT_OF_HOST_MEMORY;
86       }
87       bo->bos = bos;
88       bo->bo_capacity = new_count;
89    }
90 
91    uint32_t temp_bo_count = 0;
92    for (uint32_t i = 0; i < bo->range_count; ++i)
93       if (bo->ranges[i].bo)
94          bo->bos[temp_bo_count++] = bo->ranges[i].bo;
95 
96    qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
97 
98    if (!temp_bo_count) {
99       bo->bo_count = 0;
100    } else {
101       uint32_t final_bo_count = 1;
102       for (uint32_t i = 1; i < temp_bo_count; ++i)
103          if (bo->bos[i] != bo->bos[i - 1])
104             bo->bos[final_bo_count++] = bo->bos[i];
105 
106       bo->bo_count = final_bo_count;
107    }
108 
109    u_rwlock_wrunlock(&bo->lock);
110    return VK_SUCCESS;
111 }
112 
113 static VkResult
radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys * _ws,struct radeon_winsys_bo * _parent,uint64_t offset,uint64_t size,struct radeon_winsys_bo * _bo,uint64_t bo_offset)114 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent, uint64_t offset,
115                                    uint64_t size, struct radeon_winsys_bo *_bo, uint64_t bo_offset)
116 {
117    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
118    struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
119    struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
120    int range_count_delta, new_idx;
121    int first = 0, last;
122    struct radv_amdgpu_map_range new_first, new_last;
123    VkResult result;
124    int r;
125 
126    assert(parent->is_virtual);
127    assert(!bo || !bo->is_virtual);
128 
129    /* When the BO is NULL, AMDGPU will reset the PTE VA range to the initial state. Otherwise, it
130     * will first unmap all existing VA that overlap the requested range and then map.
131     */
132    if (bo) {
133       r = radv_amdgpu_bo_va_op(ws, bo->bo, bo_offset, size, parent->base.va + offset, 0, 0, AMDGPU_VA_OP_REPLACE);
134    } else {
135       r =
136          radv_amdgpu_bo_va_op(ws, NULL, 0, size, parent->base.va + offset, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE);
137    }
138 
139    if (r) {
140       fprintf(stderr, "radv/amdgpu: Failed to replace a PRT VA region (%d).\n", r);
141       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
142    }
143 
144    /* Do not add the BO to the virtual BO list if it's already in the global list to avoid dangling
145     * BO references because it might have been destroyed without being previously unbound. Resetting
146     * it to NULL clears the old BO ranges if present.
147     *
148     * This is going to be clarified in the Vulkan spec:
149     * https://gitlab.khronos.org/vulkan/vulkan/-/issues/3125
150     *
151     * The issue still exists for non-global BO but it will be addressed later, once we are 100% it's
152     * RADV fault (mostly because the solution looks more complicated).
153     */
154    if (bo && radv_buffer_is_resident(&bo->base)) {
155       bo = NULL;
156       bo_offset = 0;
157    }
158 
159    /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
160     * contains the newly bound range). */
161    if (parent->range_capacity - parent->range_count < 2) {
162       uint32_t range_capacity = parent->range_capacity + 2;
163       struct radv_amdgpu_map_range *ranges =
164          realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
165       if (!ranges)
166          return VK_ERROR_OUT_OF_HOST_MEMORY;
167       parent->ranges = ranges;
168       parent->range_capacity = range_capacity;
169    }
170 
171    /*
172     * [first, last] is exactly the range of ranges that either overlap the
173     * new parent, or are adjacent to it. This corresponds to the bind ranges
174     * that may change.
175     */
176    while (first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
177       ++first;
178 
179    last = first;
180    while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
181       ++last;
182 
183    /* Whether the first or last range are going to be totally removed or just
184     * resized/left alone. Note that in the case of first == last, we will split
185     * this into a part before and after the new range. The remove flag is then
186     * whether to not create the corresponding split part. */
187    bool remove_first = parent->ranges[first].offset == offset;
188    bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
189 
190    assert(parent->ranges[first].offset <= offset);
191    assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
192 
193    /* Try to merge the new range with the first range. */
194    if (parent->ranges[first].bo == bo &&
195        (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
196       size += offset - parent->ranges[first].offset;
197       offset = parent->ranges[first].offset;
198       bo_offset = parent->ranges[first].bo_offset;
199       remove_first = true;
200    }
201 
202    /* Try to merge the new range with the last range. */
203    if (parent->ranges[last].bo == bo &&
204        (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
205       size = parent->ranges[last].offset + parent->ranges[last].size - offset;
206       remove_last = true;
207    }
208 
209    range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
210    new_idx = first + !remove_first;
211 
212    /* If the first/last range are not left alone we unmap then and optionally map
213     * them again after modifications. Not that this implicitly can do the splitting
214     * if first == last. */
215    new_first = parent->ranges[first];
216    new_last = parent->ranges[last];
217 
218    if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
219       if (!remove_first) {
220          new_first.size = offset - new_first.offset;
221       }
222    }
223 
224    if (parent->ranges[last].offset < offset + size || remove_last) {
225       if (!remove_last) {
226          new_last.size -= offset + size - new_last.offset;
227          new_last.bo_offset += (offset + size - new_last.offset);
228          new_last.offset = offset + size;
229       }
230    }
231 
232    /* Moves the range list after last to account for the changed number of ranges. */
233    memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
234            sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
235 
236    if (!remove_first)
237       parent->ranges[first] = new_first;
238 
239    if (!remove_last)
240       parent->ranges[new_idx + 1] = new_last;
241 
242    /* Actually set up the new range. */
243    parent->ranges[new_idx].offset = offset;
244    parent->ranges[new_idx].size = size;
245    parent->ranges[new_idx].bo = bo;
246    parent->ranges[new_idx].bo_offset = bo_offset;
247 
248    parent->range_count += range_count_delta;
249 
250    result = radv_amdgpu_winsys_rebuild_bo_list(parent);
251    if (result != VK_SUCCESS)
252       return result;
253 
254    return VK_SUCCESS;
255 }
256 
257 static void
radv_amdgpu_log_bo(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo,bool destroyed)258 radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, bool destroyed)
259 {
260    struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
261 
262    if (!ws->debug_log_bos)
263       return;
264 
265    bo_log = malloc(sizeof(*bo_log));
266    if (!bo_log)
267       return;
268 
269    bo_log->va = bo->base.va;
270    bo_log->size = bo->size;
271    bo_log->timestamp = os_time_get_nano();
272    bo_log->is_virtual = bo->is_virtual;
273    bo_log->destroyed = destroyed;
274 
275    u_rwlock_wrlock(&ws->log_bo_list_lock);
276    list_addtail(&bo_log->list, &ws->log_bo_list);
277    u_rwlock_wrunlock(&ws->log_bo_list_lock);
278 }
279 
280 static int
radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)281 radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
282 {
283    u_rwlock_wrlock(&ws->global_bo_list.lock);
284    if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
285       unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
286       void *data = realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
287       if (!data) {
288          u_rwlock_wrunlock(&ws->global_bo_list.lock);
289          return VK_ERROR_OUT_OF_HOST_MEMORY;
290       }
291 
292       ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
293       ws->global_bo_list.capacity = capacity;
294    }
295 
296    ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
297    bo->base.use_global_list = true;
298    u_rwlock_wrunlock(&ws->global_bo_list.lock);
299    return VK_SUCCESS;
300 }
301 
302 static void
radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)303 radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
304 {
305    u_rwlock_wrlock(&ws->global_bo_list.lock);
306    for (unsigned i = ws->global_bo_list.count; i-- > 0;) {
307       if (ws->global_bo_list.bos[i] == bo) {
308          ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
309          --ws->global_bo_list.count;
310          bo->base.use_global_list = false;
311          break;
312       }
313    }
314    u_rwlock_wrunlock(&ws->global_bo_list.lock);
315 }
316 
317 static void
radv_amdgpu_winsys_bo_destroy(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo)318 radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
319 {
320    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
321    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
322 
323    radv_amdgpu_log_bo(ws, bo, true);
324 
325    if (bo->is_virtual) {
326       int r;
327 
328       /* Clear mappings of this PRT VA region. */
329       r = radv_amdgpu_bo_va_op(ws, NULL, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_CLEAR);
330       if (r) {
331          fprintf(stderr, "radv/amdgpu: Failed to clear a PRT VA region (%d).\n", r);
332       }
333 
334       free(bo->bos);
335       free(bo->ranges);
336       u_rwlock_destroy(&bo->lock);
337    } else {
338       if (ws->debug_all_bos)
339          radv_amdgpu_global_bo_list_del(ws, bo);
340       radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_UNMAP);
341       amdgpu_bo_free(bo->bo);
342    }
343 
344    if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
345       if (bo->base.vram_no_cpu_access) {
346          p_atomic_add(&ws->allocated_vram, -align64(bo->size, ws->info.gart_page_size));
347       } else {
348          p_atomic_add(&ws->allocated_vram_vis, -align64(bo->size, ws->info.gart_page_size));
349       }
350    }
351 
352    if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
353       p_atomic_add(&ws->allocated_gtt, -align64(bo->size, ws->info.gart_page_size));
354 
355    amdgpu_va_range_free(bo->va_handle);
356    FREE(bo);
357 }
358 
359 static VkResult
radv_amdgpu_winsys_bo_create(struct radeon_winsys * _ws,uint64_t size,unsigned alignment,enum radeon_bo_domain initial_domain,enum radeon_bo_flag flags,unsigned priority,uint64_t replay_address,struct radeon_winsys_bo ** out_bo)360 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
361                              enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, unsigned priority,
362                              uint64_t replay_address, struct radeon_winsys_bo **out_bo)
363 {
364    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
365    struct radv_amdgpu_winsys_bo *bo;
366    struct amdgpu_bo_alloc_request request = {0};
367    struct radv_amdgpu_map_range *ranges = NULL;
368    amdgpu_bo_handle buf_handle;
369    uint64_t va = 0;
370    amdgpu_va_handle va_handle;
371    int r;
372    VkResult result = VK_SUCCESS;
373 
374    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
375     */
376    *out_bo = NULL;
377 
378    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
379    if (!bo) {
380       return VK_ERROR_OUT_OF_HOST_MEMORY;
381    }
382 
383    unsigned virt_alignment = alignment;
384    if (size >= ws->info.pte_fragment_size)
385       virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
386 
387    assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE));
388 
389    const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH | (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
390                              (flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0);
391    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, &va,
392                              &va_handle, va_flags);
393    if (r) {
394       result = replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
395       goto error_va_alloc;
396    }
397 
398    bo->base.va = va;
399    bo->va_handle = va_handle;
400    bo->size = size;
401    bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
402 
403    if (flags & RADEON_FLAG_VIRTUAL) {
404       ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
405       if (!ranges) {
406          result = VK_ERROR_OUT_OF_HOST_MEMORY;
407          goto error_ranges_alloc;
408       }
409 
410       u_rwlock_init(&bo->lock);
411 
412       bo->ranges = ranges;
413       bo->range_count = 1;
414       bo->range_capacity = 1;
415 
416       bo->ranges[0].offset = 0;
417       bo->ranges[0].size = size;
418       bo->ranges[0].bo = NULL;
419       bo->ranges[0].bo_offset = 0;
420 
421       /* Reserve a PRT VA region. */
422       r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
423       if (r) {
424          fprintf(stderr, "radv/amdgpu: Failed to reserve a PRT VA region (%d).\n", r);
425          result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
426          goto error_ranges_alloc;
427       }
428 
429       radv_amdgpu_log_bo(ws, bo, false);
430 
431       *out_bo = (struct radeon_winsys_bo *)bo;
432       return VK_SUCCESS;
433    }
434 
435    request.alloc_size = size;
436    request.phys_alignment = alignment;
437 
438    if (initial_domain & RADEON_DOMAIN_VRAM) {
439       request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
440 
441       /* Since VRAM and GTT have almost the same performance on
442        * APUs, we could just set GTT. However, in order to decrease
443        * GTT(RAM) usage, which is shared with the OS, allow VRAM
444        * placements too. The idea is not to use VRAM usefully, but
445        * to use it so that it's not unused and wasted.
446        *
447        * Furthermore, even on discrete GPUs this is beneficial. If
448        * both GTT and VRAM are set then AMDGPU still prefers VRAM
449        * for the initial placement, but it makes the buffers
450        * spillable. Otherwise AMDGPU tries to place the buffers in
451        * VRAM really hard to the extent that we are getting a lot
452        * of unnecessary movement. This helps significantly when
453        * e.g. Horizon Zero Dawn allocates more memory than we have
454        * VRAM.
455        */
456       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
457    }
458 
459    if (initial_domain & RADEON_DOMAIN_GTT)
460       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
461    if (initial_domain & RADEON_DOMAIN_GDS)
462       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
463    if (initial_domain & RADEON_DOMAIN_OA)
464       request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
465 
466    if (flags & RADEON_FLAG_CPU_ACCESS)
467       request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
468    if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
469       bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
470       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
471    }
472    if (flags & RADEON_FLAG_GTT_WC)
473       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
474    if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
475       request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
476    if ((initial_domain & RADEON_DOMAIN_VRAM_GTT) && (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) &&
477        ((ws->perftest & RADV_PERFTEST_LOCAL_BOS) || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
478       bo->base.is_local = true;
479       request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
480    }
481 
482    if (initial_domain & RADEON_DOMAIN_VRAM) {
483       if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
484          request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
485    }
486 
487    if (flags & RADEON_FLAG_DISCARDABLE && ws->info.drm_minor >= 47)
488       request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
489 
490    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
491    if (r) {
492       fprintf(stderr, "radv/amdgpu: Failed to allocate a buffer:\n");
493       fprintf(stderr, "radv/amdgpu:    size      : %" PRIu64 " bytes\n", size);
494       fprintf(stderr, "radv/amdgpu:    alignment : %u bytes\n", alignment);
495       fprintf(stderr, "radv/amdgpu:    domains   : %u\n", initial_domain);
496       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
497       goto error_bo_alloc;
498    }
499 
500    r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0, AMDGPU_VA_OP_MAP);
501    if (r) {
502       result = VK_ERROR_UNKNOWN;
503       goto error_va_map;
504    }
505 
506    bo->bo = buf_handle;
507    bo->base.initial_domain = initial_domain;
508    bo->base.use_global_list = false;
509    bo->priority = priority;
510 
511    r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
512    assert(!r);
513 
514    if (initial_domain & RADEON_DOMAIN_VRAM) {
515       /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
516        * aren't mappable and they are counted as part of the VRAM
517        * counter.
518        *
519        * Otherwise, buffers with the CPU_ACCESS flag or without any
520        * of both (imported buffers) are counted as part of the VRAM
521        * visible counter because they can be mapped.
522        */
523       if (bo->base.vram_no_cpu_access) {
524          p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
525       } else {
526          p_atomic_add(&ws->allocated_vram_vis, align64(bo->size, ws->info.gart_page_size));
527       }
528    }
529 
530    if (initial_domain & RADEON_DOMAIN_GTT)
531       p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
532 
533    if (ws->debug_all_bos)
534       radv_amdgpu_global_bo_list_add(ws, bo);
535    radv_amdgpu_log_bo(ws, bo, false);
536 
537    *out_bo = (struct radeon_winsys_bo *)bo;
538    return VK_SUCCESS;
539 error_va_map:
540    amdgpu_bo_free(buf_handle);
541 
542 error_bo_alloc:
543    free(ranges);
544 
545 error_ranges_alloc:
546    amdgpu_va_range_free(va_handle);
547 
548 error_va_alloc:
549    FREE(bo);
550    return result;
551 }
552 
553 static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo * _bo)554 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
555 {
556    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
557    int ret;
558    void *data;
559    ret = amdgpu_bo_cpu_map(bo->bo, &data);
560    if (ret)
561       return NULL;
562    return data;
563 }
564 
565 static void
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo * _bo)566 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
567 {
568    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
569    amdgpu_bo_cpu_unmap(bo->bo);
570 }
571 
572 static uint64_t
radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys * ws,uint64_t size,unsigned alignment)573 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size, unsigned alignment)
574 {
575    uint64_t vm_alignment = alignment;
576 
577    /* Increase the VM alignment for faster address translation. */
578    if (size >= ws->info.pte_fragment_size)
579       vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
580 
581    /* Gfx9: Increase the VM alignment to the most significant bit set
582     * in the size for faster address translation.
583     */
584    if (ws->info.gfx_level >= GFX9) {
585       unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
586       uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
587 
588       vm_alignment = MAX2(vm_alignment, msb_alignment);
589    }
590    return vm_alignment;
591 }
592 
593 static VkResult
radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys * _ws,void * pointer,uint64_t size,unsigned priority,struct radeon_winsys_bo ** out_bo)594 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size, unsigned priority,
595                                struct radeon_winsys_bo **out_bo)
596 {
597    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
598    amdgpu_bo_handle buf_handle;
599    struct radv_amdgpu_winsys_bo *bo;
600    uint64_t va;
601    amdgpu_va_handle va_handle;
602    uint64_t vm_alignment;
603    VkResult result = VK_SUCCESS;
604    int ret;
605 
606    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
607     */
608    *out_bo = NULL;
609 
610    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
611    if (!bo)
612       return VK_ERROR_OUT_OF_HOST_MEMORY;
613 
614    ret = amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle);
615    if (ret) {
616       if (ret == -EINVAL) {
617          result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
618       } else {
619          result = VK_ERROR_UNKNOWN;
620       }
621       goto error;
622    }
623 
624    /* Using the optimal VM alignment also fixes GPU hangs for buffers that
625     * are imported.
626     */
627    vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size);
628 
629    if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va, &va_handle,
630                              AMDGPU_VA_RANGE_HIGH)) {
631       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
632       goto error_va_alloc;
633    }
634 
635    if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP)) {
636       result = VK_ERROR_UNKNOWN;
637       goto error_va_map;
638    }
639 
640    /* Initialize it */
641    bo->base.va = va;
642    bo->va_handle = va_handle;
643    bo->size = size;
644    bo->bo = buf_handle;
645    bo->base.initial_domain = RADEON_DOMAIN_GTT;
646    bo->base.use_global_list = false;
647    bo->priority = priority;
648 
649    ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
650    assert(!r);
651 
652    p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
653 
654    if (ws->debug_all_bos)
655       radv_amdgpu_global_bo_list_add(ws, bo);
656    radv_amdgpu_log_bo(ws, bo, false);
657 
658    *out_bo = (struct radeon_winsys_bo *)bo;
659    return VK_SUCCESS;
660 
661 error_va_map:
662    amdgpu_va_range_free(va_handle);
663 
664 error_va_alloc:
665    amdgpu_bo_free(buf_handle);
666 
667 error:
668    FREE(bo);
669    return result;
670 }
671 
672 static VkResult
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys * _ws,int fd,unsigned priority,struct radeon_winsys_bo ** out_bo,uint64_t * alloc_size)673 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority, struct radeon_winsys_bo **out_bo,
674                               uint64_t *alloc_size)
675 {
676    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
677    struct radv_amdgpu_winsys_bo *bo;
678    uint64_t va;
679    amdgpu_va_handle va_handle;
680    enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
681    struct amdgpu_bo_import_result result;
682    struct amdgpu_bo_info info;
683    enum radeon_bo_domain initial = 0;
684    int r;
685    VkResult vk_result = VK_SUCCESS;
686 
687    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
688     */
689    *out_bo = NULL;
690 
691    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
692    if (!bo)
693       return VK_ERROR_OUT_OF_HOST_MEMORY;
694 
695    r = amdgpu_bo_import(ws->dev, type, fd, &result);
696    if (r) {
697       vk_result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
698       goto error;
699    }
700 
701    r = amdgpu_bo_query_info(result.buf_handle, &info);
702    if (r) {
703       vk_result = VK_ERROR_UNKNOWN;
704       goto error_query;
705    }
706 
707    if (alloc_size) {
708       *alloc_size = info.alloc_size;
709    }
710 
711    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0, &va, &va_handle,
712                              AMDGPU_VA_RANGE_HIGH);
713    if (r) {
714       vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
715       goto error_query;
716    }
717 
718    r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
719    if (r) {
720       vk_result = VK_ERROR_UNKNOWN;
721       goto error_va_map;
722    }
723 
724    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
725       initial |= RADEON_DOMAIN_VRAM;
726    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
727       initial |= RADEON_DOMAIN_GTT;
728 
729    bo->bo = result.buf_handle;
730    bo->base.va = va;
731    bo->va_handle = va_handle;
732    bo->base.initial_domain = initial;
733    bo->base.use_global_list = false;
734    bo->size = result.alloc_size;
735    bo->priority = priority;
736 
737    r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
738    assert(!r);
739 
740    if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
741       p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
742    if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
743       p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
744 
745    if (ws->debug_all_bos)
746       radv_amdgpu_global_bo_list_add(ws, bo);
747    radv_amdgpu_log_bo(ws, bo, false);
748 
749    *out_bo = (struct radeon_winsys_bo *)bo;
750    return VK_SUCCESS;
751 error_va_map:
752    amdgpu_va_range_free(va_handle);
753 
754 error_query:
755    amdgpu_bo_free(result.buf_handle);
756 
757 error:
758    FREE(bo);
759    return vk_result;
760 }
761 
762 static bool
radv_amdgpu_winsys_get_fd(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,int * fd)763 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, int *fd)
764 {
765    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
766    enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
767    int r;
768    unsigned handle;
769    r = amdgpu_bo_export(bo->bo, type, &handle);
770    if (r)
771       return false;
772 
773    *fd = (int)handle;
774    return true;
775 }
776 
777 static bool
radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys * _ws,int fd,enum radeon_bo_domain * domains,enum radeon_bo_flag * flags)778 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd, enum radeon_bo_domain *domains,
779                                  enum radeon_bo_flag *flags)
780 {
781    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
782    struct amdgpu_bo_import_result result = {0};
783    struct amdgpu_bo_info info = {0};
784    int r;
785 
786    *domains = 0;
787    *flags = 0;
788 
789    r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
790    if (r)
791       return false;
792 
793    r = amdgpu_bo_query_info(result.buf_handle, &info);
794    amdgpu_bo_free(result.buf_handle);
795    if (r)
796       return false;
797 
798    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
799       *domains |= RADEON_DOMAIN_VRAM;
800    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
801       *domains |= RADEON_DOMAIN_GTT;
802    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
803       *domains |= RADEON_DOMAIN_GDS;
804    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
805       *domains |= RADEON_DOMAIN_OA;
806 
807    if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
808       *flags |= RADEON_FLAG_CPU_ACCESS;
809    if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
810       *flags |= RADEON_FLAG_NO_CPU_ACCESS;
811    if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
812       *flags |= RADEON_FLAG_IMPLICIT_SYNC;
813    if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
814       *flags |= RADEON_FLAG_GTT_WC;
815    if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
816       *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
817    if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
818       *flags |= RADEON_FLAG_ZERO_VRAM;
819    return true;
820 }
821 
822 static unsigned
eg_tile_split(unsigned tile_split)823 eg_tile_split(unsigned tile_split)
824 {
825    switch (tile_split) {
826    case 0:
827       tile_split = 64;
828       break;
829    case 1:
830       tile_split = 128;
831       break;
832    case 2:
833       tile_split = 256;
834       break;
835    case 3:
836       tile_split = 512;
837       break;
838    default:
839    case 4:
840       tile_split = 1024;
841       break;
842    case 5:
843       tile_split = 2048;
844       break;
845    case 6:
846       tile_split = 4096;
847       break;
848    }
849    return tile_split;
850 }
851 
852 static unsigned
radv_eg_tile_split_rev(unsigned eg_tile_split)853 radv_eg_tile_split_rev(unsigned eg_tile_split)
854 {
855    switch (eg_tile_split) {
856    case 64:
857       return 0;
858    case 128:
859       return 1;
860    case 256:
861       return 2;
862    case 512:
863       return 3;
864    default:
865    case 1024:
866       return 4;
867    case 2048:
868       return 5;
869    case 4096:
870       return 6;
871    }
872 }
873 
874 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
875 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK  0x3
876 
877 static void
radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)878 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
879                                    struct radeon_bo_metadata *md)
880 {
881    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
882    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
883    struct amdgpu_bo_metadata metadata = {0};
884    uint64_t tiling_flags = 0;
885 
886    if (ws->info.gfx_level >= GFX9) {
887       tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
888       tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
889       tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
890       tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
891       tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
892       tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
893       tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
894    } else {
895       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
896          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
897       else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
898          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
899       else
900          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
901 
902       tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
903       tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
904       tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
905       if (md->u.legacy.tile_split)
906          tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
907       tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
908       tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1);
909 
910       if (md->u.legacy.scanout)
911          tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
912       else
913          tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
914    }
915 
916    metadata.tiling_info = tiling_flags;
917    metadata.size_metadata = md->size_metadata;
918    memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
919 
920    amdgpu_bo_set_metadata(bo->bo, &metadata);
921 }
922 
923 static void
radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)924 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
925                                    struct radeon_bo_metadata *md)
926 {
927    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
928    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
929    struct amdgpu_bo_info info = {0};
930 
931    int r = amdgpu_bo_query_info(bo->bo, &info);
932    if (r)
933       return;
934 
935    uint64_t tiling_flags = info.metadata.tiling_info;
936 
937    if (ws->info.gfx_level >= GFX9) {
938       md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
939       md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
940    } else {
941       md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
942       md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
943 
944       if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
945          md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
946       else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
947          md->u.legacy.microtile = RADEON_LAYOUT_TILED;
948 
949       md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
950       md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
951       md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
952       md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
953       md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
954       md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
955       md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
956    }
957 
958    md->size_metadata = info.metadata.size_metadata;
959    memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
960 }
961 
962 static VkResult
radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,bool resident)963 radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool resident)
964 {
965    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
966    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
967    VkResult result = VK_SUCCESS;
968 
969    /* Do not add the BO to the global list if it's a local BO because the
970     * kernel maintains a list for us.
971     */
972    if (bo->base.is_local)
973       return VK_SUCCESS;
974 
975    /* Do not add the BO twice to the global list if the allbos debug
976     * option is enabled.
977     */
978    if (ws->debug_all_bos)
979       return VK_SUCCESS;
980 
981    if (resident) {
982       result = radv_amdgpu_global_bo_list_add(ws, bo);
983    } else {
984       radv_amdgpu_global_bo_list_del(ws, bo);
985    }
986 
987    return result;
988 }
989 
990 static int
radv_amdgpu_bo_va_compare(const void * a,const void * b)991 radv_amdgpu_bo_va_compare(const void *a, const void *b)
992 {
993    const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo *const *)a;
994    const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo *const *)b;
995    return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
996 }
997 
998 static uint64_t
radv_amdgpu_canonicalize_va(uint64_t va)999 radv_amdgpu_canonicalize_va(uint64_t va)
1000 {
1001    /* Would be less hardcoded to use addr32_hi (0xffff8000) to generate a mask,
1002     * but there are confusing differences between page fault reports from kernel where
1003     * it seems to report the top 48 bits, where addr32_hi has 47-bits. */
1004    return va & ((1ull << 48) - 1);
1005 }
1006 
1007 static void
radv_amdgpu_dump_bo_log(struct radeon_winsys * _ws,FILE * file)1008 radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
1009 {
1010    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1011    struct radv_amdgpu_winsys_bo_log *bo_log;
1012 
1013    if (!ws->debug_log_bos)
1014       return;
1015 
1016    u_rwlock_rdlock(&ws->log_bo_list_lock);
1017    LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) {
1018       fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n", (long long)bo_log->timestamp,
1019               (long long)radv_amdgpu_canonicalize_va(bo_log->va),
1020               (long long)radv_amdgpu_canonicalize_va(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual);
1021    }
1022    u_rwlock_rdunlock(&ws->log_bo_list_lock);
1023 }
1024 
1025 static void
radv_amdgpu_dump_bo_ranges(struct radeon_winsys * _ws,FILE * file)1026 radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
1027 {
1028    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1029    if (ws->debug_all_bos) {
1030       struct radv_amdgpu_winsys_bo **bos = NULL;
1031       int i = 0;
1032 
1033       u_rwlock_rdlock(&ws->global_bo_list.lock);
1034       bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
1035       if (!bos) {
1036          u_rwlock_rdunlock(&ws->global_bo_list.lock);
1037          fprintf(file, "  Failed to allocate memory to sort VA ranges for dumping\n");
1038          return;
1039       }
1040 
1041       for (i = 0; i < ws->global_bo_list.count; i++) {
1042          bos[i] = ws->global_bo_list.bos[i];
1043       }
1044       qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
1045 
1046       for (i = 0; i < ws->global_bo_list.count; ++i) {
1047          fprintf(file, "  VA=%.16llx-%.16llx, handle=%d\n", (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va),
1048                  (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va + bos[i]->size), bos[i]->bo_handle);
1049       }
1050       free(bos);
1051       u_rwlock_rdunlock(&ws->global_bo_list.lock);
1052    } else
1053       fprintf(file, "  To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
1054 }
1055 void
radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys * ws)1056 radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
1057 {
1058    ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
1059    ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
1060    ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
1061    ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
1062    ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
1063    ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
1064    ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
1065    ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
1066    ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
1067    ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
1068    ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
1069    ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
1070    ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
1071    ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
1072 }
1073