• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based on amdgpu winsys.
6  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7  * Copyright © 2015 Advanced Micro Devices, Inc.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include <stdio.h>
30 
31 #include "radv_amdgpu_bo.h"
32 #include "radv_debug.h"
33 
34 #include <amdgpu.h>
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38 #include "drm-uapi/amdgpu_drm.h"
39 
40 #include "util/os_time.h"
41 #include "util/u_atomic.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 
45 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo);
46 
47 static int
radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys * ws,amdgpu_bo_handle bo,uint64_t offset,uint64_t size,uint64_t addr,uint32_t bo_flags,uint64_t internal_flags,uint32_t ops)48 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset,
49                      uint64_t size, uint64_t addr, uint32_t bo_flags, uint64_t internal_flags,
50                      uint32_t ops)
51 {
52    uint64_t flags = internal_flags;
53    if (bo) {
54       flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
55 
56       if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
57          flags |= AMDGPU_VM_MTYPE_UC;
58 
59       if (!(bo_flags & RADEON_FLAG_READ_ONLY))
60          flags |= AMDGPU_VM_PAGE_WRITEABLE;
61    }
62 
63    size = align64(size, getpagesize());
64 
65    return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, flags, ops);
66 }
67 
68 static void
radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo,const struct radv_amdgpu_map_range * range)69 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo,
70                                const struct radv_amdgpu_map_range *range)
71 {
72    uint64_t internal_flags = 0;
73    assert(range->size);
74 
75    if (!range->bo) {
76       if (!ws->info.has_sparse_vm_mappings)
77          return;
78 
79       internal_flags |= AMDGPU_VM_PAGE_PRT;
80    } else
81       p_atomic_inc(&range->bo->ref_count);
82 
83    int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL, range->bo_offset, range->size,
84                                 range->offset + bo->base.va, 0, internal_flags, AMDGPU_VA_OP_MAP);
85    if (r)
86       abort();
87 }
88 
89 static void
radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo,const struct radv_amdgpu_map_range * range)90 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo,
91                                  const struct radv_amdgpu_map_range *range)
92 {
93    uint64_t internal_flags = 0;
94    assert(range->size);
95 
96    if (!range->bo) {
97       if (!ws->info.has_sparse_vm_mappings)
98          return;
99 
100       /* Even though this is an unmap, if we don't set this flag,
101          AMDGPU is going to complain about the missing buffer. */
102       internal_flags |= AMDGPU_VM_PAGE_PRT;
103    }
104 
105    int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL, range->bo_offset, range->size,
106                                 range->offset + bo->base.va, 0, internal_flags, AMDGPU_VA_OP_UNMAP);
107    if (r)
108       abort();
109 
110    if (range->bo)
111       ws->base.buffer_destroy(&ws->base, (struct radeon_winsys_bo *)range->bo);
112 }
113 
114 static int
bo_comparator(const void * ap,const void * bp)115 bo_comparator(const void *ap, const void *bp)
116 {
117    struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
118    struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
119    return (a > b) ? 1 : (a < b) ? -1 : 0;
120 }
121 
122 static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo * bo)123 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
124 {
125    if (bo->bo_capacity < bo->range_count) {
126       uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
127       struct radv_amdgpu_winsys_bo **bos =
128          realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
129       if (!bos)
130          return VK_ERROR_OUT_OF_HOST_MEMORY;
131       bo->bos = bos;
132       bo->bo_capacity = new_count;
133    }
134 
135    uint32_t temp_bo_count = 0;
136    for (uint32_t i = 0; i < bo->range_count; ++i)
137       if (bo->ranges[i].bo)
138          bo->bos[temp_bo_count++] = bo->ranges[i].bo;
139 
140    qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
141 
142    uint32_t final_bo_count = 1;
143    for (uint32_t i = 1; i < temp_bo_count; ++i)
144       if (bo->bos[i] != bo->bos[i - 1])
145          bo->bos[final_bo_count++] = bo->bos[i];
146 
147    bo->bo_count = final_bo_count;
148 
149    return VK_SUCCESS;
150 }
151 
152 static VkResult
radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys * _ws,struct radeon_winsys_bo * _parent,uint64_t offset,uint64_t size,struct radeon_winsys_bo * _bo,uint64_t bo_offset)153 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent,
154                                    uint64_t offset, uint64_t size, struct radeon_winsys_bo *_bo,
155                                    uint64_t bo_offset)
156 {
157    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
158    struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
159    struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
160    int range_count_delta, new_idx;
161    int first = 0, last;
162    struct radv_amdgpu_map_range new_first, new_last;
163    VkResult result;
164 
165    assert(parent->is_virtual);
166    assert(!bo || !bo->is_virtual);
167 
168    /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
169     * contains the newly bound range). */
170    if (parent->range_capacity - parent->range_count < 2) {
171       uint32_t range_capacity = parent->range_capacity + 2;
172       struct radv_amdgpu_map_range *ranges =
173          realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
174       if (!ranges)
175          return VK_ERROR_OUT_OF_HOST_MEMORY;
176       parent->ranges = ranges;
177       parent->range_capacity = range_capacity;
178    }
179 
180    /*
181     * [first, last] is exactly the range of ranges that either overlap the
182     * new parent, or are adjacent to it. This corresponds to the bind ranges
183     * that may change.
184     */
185    while (first + 1 < parent->range_count &&
186           parent->ranges[first].offset + parent->ranges[first].size < offset)
187       ++first;
188 
189    last = first;
190    while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
191       ++last;
192 
193    /* Whether the first or last range are going to be totally removed or just
194     * resized/left alone. Note that in the case of first == last, we will split
195     * this into a part before and after the new range. The remove flag is then
196     * whether to not create the corresponding split part. */
197    bool remove_first = parent->ranges[first].offset == offset;
198    bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
199    bool unmapped_first = false;
200 
201    assert(parent->ranges[first].offset <= offset);
202    assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
203 
204    /* Try to merge the new range with the first range. */
205    if (parent->ranges[first].bo == bo &&
206        (!bo ||
207         offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
208       size += offset - parent->ranges[first].offset;
209       offset = parent->ranges[first].offset;
210       bo_offset = parent->ranges[first].bo_offset;
211       remove_first = true;
212    }
213 
214    /* Try to merge the new range with the last range. */
215    if (parent->ranges[last].bo == bo &&
216        (!bo ||
217         offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
218       size = parent->ranges[last].offset + parent->ranges[last].size - offset;
219       remove_last = true;
220    }
221 
222    range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
223    new_idx = first + !remove_first;
224 
225    /* Any range between first and last is going to be entirely covered by the new range so just
226     * unmap them. */
227    for (int i = first + 1; i < last; ++i)
228       radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + i);
229 
230    /* If the first/last range are not left alone we unmap then and optionally map
231     * them again after modifications. Not that this implicitly can do the splitting
232     * if first == last. */
233    new_first = parent->ranges[first];
234    new_last = parent->ranges[last];
235 
236    if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
237       radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + first);
238       unmapped_first = true;
239 
240       if (!remove_first) {
241          new_first.size = offset - new_first.offset;
242          radv_amdgpu_winsys_virtual_map(ws, parent, &new_first);
243       }
244    }
245 
246    if (parent->ranges[last].offset < offset + size || remove_last) {
247       if (first != last || !unmapped_first)
248          radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + last);
249 
250       if (!remove_last) {
251          new_last.size -= offset + size - new_last.offset;
252          new_last.bo_offset += (offset + size - new_last.offset);
253          new_last.offset = offset + size;
254          radv_amdgpu_winsys_virtual_map(ws, parent, &new_last);
255       }
256    }
257 
258    /* Moves the range list after last to account for the changed number of ranges. */
259    memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
260            sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
261 
262    if (!remove_first)
263       parent->ranges[first] = new_first;
264 
265    if (!remove_last)
266       parent->ranges[new_idx + 1] = new_last;
267 
268    /* Actually set up the new range. */
269    parent->ranges[new_idx].offset = offset;
270    parent->ranges[new_idx].size = size;
271    parent->ranges[new_idx].bo = bo;
272    parent->ranges[new_idx].bo_offset = bo_offset;
273 
274    radv_amdgpu_winsys_virtual_map(ws, parent, parent->ranges + new_idx);
275 
276    parent->range_count += range_count_delta;
277 
278    result = radv_amdgpu_winsys_rebuild_bo_list(parent);
279    if (result != VK_SUCCESS)
280       return result;
281 
282    return VK_SUCCESS;
283 }
284 
285 struct radv_amdgpu_winsys_bo_log {
286    struct list_head list;
287    uint64_t va;
288    uint64_t size;
289    uint64_t timestamp; /* CPU timestamp */
290    uint8_t is_virtual : 1;
291    uint8_t destroyed : 1;
292 };
293 
294 static void
radv_amdgpu_log_bo(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo,bool destroyed)295 radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, bool destroyed)
296 {
297    struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
298 
299    if (!ws->debug_log_bos)
300       return;
301 
302    bo_log = malloc(sizeof(*bo_log));
303    if (!bo_log)
304       return;
305 
306    bo_log->va = bo->base.va;
307    bo_log->size = bo->size;
308    bo_log->timestamp = os_time_get_nano();
309    bo_log->is_virtual = bo->is_virtual;
310    bo_log->destroyed = destroyed;
311 
312    u_rwlock_wrlock(&ws->log_bo_list_lock);
313    list_addtail(&bo_log->list, &ws->log_bo_list);
314    u_rwlock_wrunlock(&ws->log_bo_list_lock);
315 }
316 
317 static int
radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)318 radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
319 {
320    u_rwlock_wrlock(&ws->global_bo_list.lock);
321    if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
322       unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
323       void *data =
324          realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
325       if (!data) {
326          u_rwlock_wrunlock(&ws->global_bo_list.lock);
327          return VK_ERROR_OUT_OF_HOST_MEMORY;
328       }
329 
330       ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
331       ws->global_bo_list.capacity = capacity;
332    }
333 
334    ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
335    bo->base.use_global_list = true;
336    u_rwlock_wrunlock(&ws->global_bo_list.lock);
337    return VK_SUCCESS;
338 }
339 
340 static void
radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)341 radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
342 {
343    u_rwlock_wrlock(&ws->global_bo_list.lock);
344    for (unsigned i = ws->global_bo_list.count; i-- > 0;) {
345       if (ws->global_bo_list.bos[i] == bo) {
346          ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
347          --ws->global_bo_list.count;
348          bo->base.use_global_list = false;
349          break;
350       }
351    }
352    u_rwlock_wrunlock(&ws->global_bo_list.lock);
353 }
354 
355 static void
radv_amdgpu_winsys_bo_destroy(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo)356 radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
357 {
358    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
359    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
360 
361    if (p_atomic_dec_return(&bo->ref_count))
362       return;
363 
364    radv_amdgpu_log_bo(ws, bo, true);
365 
366    if (bo->is_virtual) {
367       for (uint32_t i = 0; i < bo->range_count; ++i) {
368          radv_amdgpu_winsys_virtual_unmap(ws, bo, bo->ranges + i);
369       }
370       free(bo->bos);
371       free(bo->ranges);
372    } else {
373       if (ws->debug_all_bos)
374          radv_amdgpu_global_bo_list_del(ws, bo);
375       radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_UNMAP);
376       amdgpu_bo_free(bo->bo);
377    }
378 
379    if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
380       if (bo->base.vram_no_cpu_access) {
381          p_atomic_add(&ws->allocated_vram, -align64(bo->size, ws->info.gart_page_size));
382       } else {
383          p_atomic_add(&ws->allocated_vram_vis, -align64(bo->size, ws->info.gart_page_size));
384       }
385    }
386 
387    if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
388       p_atomic_add(&ws->allocated_gtt, -align64(bo->size, ws->info.gart_page_size));
389 
390    amdgpu_va_range_free(bo->va_handle);
391    FREE(bo);
392 }
393 
394 static VkResult
radv_amdgpu_winsys_bo_create(struct radeon_winsys * _ws,uint64_t size,unsigned alignment,enum radeon_bo_domain initial_domain,enum radeon_bo_flag flags,unsigned priority,uint64_t replay_address,struct radeon_winsys_bo ** out_bo)395 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
396                              enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
397                              unsigned priority, uint64_t replay_address,
398                              struct radeon_winsys_bo **out_bo)
399 {
400    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
401    struct radv_amdgpu_winsys_bo *bo;
402    struct amdgpu_bo_alloc_request request = {0};
403    struct radv_amdgpu_map_range *ranges = NULL;
404    amdgpu_bo_handle buf_handle;
405    uint64_t va = 0;
406    amdgpu_va_handle va_handle;
407    int r;
408    VkResult result = VK_SUCCESS;
409 
410    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
411     */
412    *out_bo = NULL;
413 
414    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
415    if (!bo) {
416       return VK_ERROR_OUT_OF_HOST_MEMORY;
417    }
418 
419    unsigned virt_alignment = alignment;
420    if (size >= ws->info.pte_fragment_size)
421       virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
422 
423    assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE));
424 
425    const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH |
426                              (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
427                              (flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0);
428    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address,
429                              &va, &va_handle, va_flags);
430    if (r) {
431       result =
432          replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
433       goto error_va_alloc;
434    }
435 
436    bo->base.va = va;
437    bo->va_handle = va_handle;
438    bo->size = size;
439    bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
440    bo->ref_count = 1;
441 
442    if (flags & RADEON_FLAG_VIRTUAL) {
443       ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
444       if (!ranges) {
445          result = VK_ERROR_OUT_OF_HOST_MEMORY;
446          goto error_ranges_alloc;
447       }
448 
449       bo->ranges = ranges;
450       bo->range_count = 1;
451       bo->range_capacity = 1;
452 
453       bo->ranges[0].offset = 0;
454       bo->ranges[0].size = size;
455       bo->ranges[0].bo = NULL;
456       bo->ranges[0].bo_offset = 0;
457 
458       radv_amdgpu_winsys_virtual_map(ws, bo, bo->ranges);
459       radv_amdgpu_log_bo(ws, bo, false);
460 
461       *out_bo = (struct radeon_winsys_bo *)bo;
462       return VK_SUCCESS;
463    }
464 
465    request.alloc_size = size;
466    request.phys_alignment = alignment;
467 
468    if (initial_domain & RADEON_DOMAIN_VRAM) {
469       request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
470 
471       /* Since VRAM and GTT have almost the same performance on
472        * APUs, we could just set GTT. However, in order to decrease
473        * GTT(RAM) usage, which is shared with the OS, allow VRAM
474        * placements too. The idea is not to use VRAM usefully, but
475        * to use it so that it's not unused and wasted.
476        *
477        * Furthermore, even on discrete GPUs this is beneficial. If
478        * both GTT and VRAM are set then AMDGPU still prefers VRAM
479        * for the initial placement, but it makes the buffers
480        * spillable. Otherwise AMDGPU tries to place the buffers in
481        * VRAM really hard to the extent that we are getting a lot
482        * of unnecessary movement. This helps significantly when
483        * e.g. Horizon Zero Dawn allocates more memory than we have
484        * VRAM.
485        */
486       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
487    }
488 
489    if (initial_domain & RADEON_DOMAIN_GTT)
490       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
491    if (initial_domain & RADEON_DOMAIN_GDS)
492       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
493    if (initial_domain & RADEON_DOMAIN_OA)
494       request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
495 
496    if (flags & RADEON_FLAG_CPU_ACCESS)
497       request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
498    if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
499       bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
500       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
501    }
502    if (flags & RADEON_FLAG_GTT_WC)
503       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
504    if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
505       request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
506    if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
507        ((ws->perftest & RADV_PERFTEST_LOCAL_BOS) || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
508       bo->base.is_local = true;
509       request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
510    }
511 
512    /* this won't do anything on pre 4.9 kernels */
513    if (initial_domain & RADEON_DOMAIN_VRAM) {
514       if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
515          request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
516    }
517 
518    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
519    if (r) {
520       fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
521       fprintf(stderr, "amdgpu:    size      : %" PRIu64 " bytes\n", size);
522       fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
523       fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
524       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
525       goto error_bo_alloc;
526    }
527 
528    r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0, AMDGPU_VA_OP_MAP);
529    if (r) {
530       result = VK_ERROR_UNKNOWN;
531       goto error_va_map;
532    }
533 
534    bo->bo = buf_handle;
535    bo->base.initial_domain = initial_domain;
536    bo->base.use_global_list = bo->base.is_local;
537    bo->is_shared = false;
538    bo->priority = priority;
539 
540    r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
541    assert(!r);
542 
543    if (initial_domain & RADEON_DOMAIN_VRAM) {
544       /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
545        * aren't mappable and they are counted as part of the VRAM
546        * counter.
547        *
548        * Otherwise, buffers with the CPU_ACCESS flag or without any
549        * of both (imported buffers) are counted as part of the VRAM
550        * visible counter because they can be mapped.
551        */
552       if (bo->base.vram_no_cpu_access) {
553          p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
554       } else {
555          p_atomic_add(&ws->allocated_vram_vis, align64(bo->size, ws->info.gart_page_size));
556       }
557    }
558 
559    if (initial_domain & RADEON_DOMAIN_GTT)
560       p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
561 
562    if (ws->debug_all_bos)
563       radv_amdgpu_global_bo_list_add(ws, bo);
564    radv_amdgpu_log_bo(ws, bo, false);
565 
566    *out_bo = (struct radeon_winsys_bo *)bo;
567    return VK_SUCCESS;
568 error_va_map:
569    amdgpu_bo_free(buf_handle);
570 
571 error_bo_alloc:
572    free(ranges);
573 
574 error_ranges_alloc:
575    amdgpu_va_range_free(va_handle);
576 
577 error_va_alloc:
578    FREE(bo);
579    return result;
580 }
581 
582 static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo * _bo)583 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
584 {
585    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
586    int ret;
587    void *data;
588    ret = amdgpu_bo_cpu_map(bo->bo, &data);
589    if (ret)
590       return NULL;
591    return data;
592 }
593 
594 static void
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo * _bo)595 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
596 {
597    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
598    amdgpu_bo_cpu_unmap(bo->bo);
599 }
600 
601 static uint64_t
radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys * ws,uint64_t size,unsigned alignment)602 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size,
603                                      unsigned alignment)
604 {
605    uint64_t vm_alignment = alignment;
606 
607    /* Increase the VM alignment for faster address translation. */
608    if (size >= ws->info.pte_fragment_size)
609       vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
610 
611    /* Gfx9: Increase the VM alignment to the most significant bit set
612     * in the size for faster address translation.
613     */
614    if (ws->info.chip_class >= GFX9) {
615       unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
616       uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
617 
618       vm_alignment = MAX2(vm_alignment, msb_alignment);
619    }
620    return vm_alignment;
621 }
622 
623 static VkResult
radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys * _ws,void * pointer,uint64_t size,unsigned priority,struct radeon_winsys_bo ** out_bo)624 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size,
625                                unsigned priority, struct radeon_winsys_bo **out_bo)
626 {
627    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
628    amdgpu_bo_handle buf_handle;
629    struct radv_amdgpu_winsys_bo *bo;
630    uint64_t va;
631    amdgpu_va_handle va_handle;
632    uint64_t vm_alignment;
633    VkResult result = VK_SUCCESS;
634 
635    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
636     */
637    *out_bo = NULL;
638 
639    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
640    if (!bo)
641       return VK_ERROR_OUT_OF_HOST_MEMORY;
642 
643    if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle)) {
644       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
645       goto error;
646    }
647 
648    /* Using the optimal VM alignment also fixes GPU hangs for buffers that
649     * are imported.
650     */
651    vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size);
652 
653    if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va,
654                              &va_handle, AMDGPU_VA_RANGE_HIGH)) {
655       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
656       goto error_va_alloc;
657    }
658 
659    if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP)) {
660       result = VK_ERROR_UNKNOWN;
661       goto error_va_map;
662    }
663 
664    /* Initialize it */
665    bo->base.va = va;
666    bo->va_handle = va_handle;
667    bo->size = size;
668    bo->ref_count = 1;
669    bo->bo = buf_handle;
670    bo->base.initial_domain = RADEON_DOMAIN_GTT;
671    bo->base.use_global_list = false;
672    bo->priority = priority;
673 
674    ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
675    assert(!r);
676 
677    p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
678 
679    if (ws->debug_all_bos)
680       radv_amdgpu_global_bo_list_add(ws, bo);
681    radv_amdgpu_log_bo(ws, bo, false);
682 
683    *out_bo = (struct radeon_winsys_bo *)bo;
684    return VK_SUCCESS;
685 
686 error_va_map:
687    amdgpu_va_range_free(va_handle);
688 
689 error_va_alloc:
690    amdgpu_bo_free(buf_handle);
691 
692 error:
693    FREE(bo);
694    return result;
695 }
696 
697 static VkResult
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys * _ws,int fd,unsigned priority,struct radeon_winsys_bo ** out_bo,uint64_t * alloc_size)698 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority,
699                               struct radeon_winsys_bo **out_bo, uint64_t *alloc_size)
700 {
701    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
702    struct radv_amdgpu_winsys_bo *bo;
703    uint64_t va;
704    amdgpu_va_handle va_handle;
705    enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
706    struct amdgpu_bo_import_result result = {0};
707    struct amdgpu_bo_info info = {0};
708    enum radeon_bo_domain initial = 0;
709    int r;
710    VkResult vk_result = VK_SUCCESS;
711 
712    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
713     */
714    *out_bo = NULL;
715 
716    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
717    if (!bo)
718       return VK_ERROR_OUT_OF_HOST_MEMORY;
719 
720    r = amdgpu_bo_import(ws->dev, type, fd, &result);
721    if (r) {
722       vk_result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
723       goto error;
724    }
725 
726    r = amdgpu_bo_query_info(result.buf_handle, &info);
727    if (r) {
728       vk_result = VK_ERROR_UNKNOWN;
729       goto error_query;
730    }
731 
732    if (alloc_size) {
733       *alloc_size = info.alloc_size;
734    }
735 
736    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0,
737                              &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
738    if (r) {
739       vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
740       goto error_query;
741    }
742 
743    r =
744       radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
745    if (r) {
746       vk_result = VK_ERROR_UNKNOWN;
747       goto error_va_map;
748    }
749 
750    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
751       initial |= RADEON_DOMAIN_VRAM;
752    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
753       initial |= RADEON_DOMAIN_GTT;
754 
755    bo->bo = result.buf_handle;
756    bo->base.va = va;
757    bo->va_handle = va_handle;
758    bo->base.initial_domain = initial;
759    bo->base.use_global_list = false;
760    bo->size = result.alloc_size;
761    bo->is_shared = true;
762    bo->priority = priority;
763    bo->ref_count = 1;
764 
765    r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
766    assert(!r);
767 
768    if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
769       p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
770    if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
771       p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
772 
773    if (ws->debug_all_bos)
774       radv_amdgpu_global_bo_list_add(ws, bo);
775    radv_amdgpu_log_bo(ws, bo, false);
776 
777    *out_bo = (struct radeon_winsys_bo *)bo;
778    return VK_SUCCESS;
779 error_va_map:
780    amdgpu_va_range_free(va_handle);
781 
782 error_query:
783    amdgpu_bo_free(result.buf_handle);
784 
785 error:
786    FREE(bo);
787    return vk_result;
788 }
789 
790 static bool
radv_amdgpu_winsys_get_fd(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,int * fd)791 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, int *fd)
792 {
793    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
794    enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
795    int r;
796    unsigned handle;
797    r = amdgpu_bo_export(bo->bo, type, &handle);
798    if (r)
799       return false;
800 
801    *fd = (int)handle;
802    bo->is_shared = true;
803    return true;
804 }
805 
806 static bool
radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys * _ws,int fd,enum radeon_bo_domain * domains,enum radeon_bo_flag * flags)807 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd, enum radeon_bo_domain *domains,
808                                  enum radeon_bo_flag *flags)
809 {
810    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
811    struct amdgpu_bo_import_result result = {0};
812    struct amdgpu_bo_info info = {0};
813    int r;
814 
815    *domains = 0;
816    *flags = 0;
817 
818    r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
819    if (r)
820       return false;
821 
822    r = amdgpu_bo_query_info(result.buf_handle, &info);
823    amdgpu_bo_free(result.buf_handle);
824    if (r)
825       return false;
826 
827    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
828       *domains |= RADEON_DOMAIN_VRAM;
829    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
830       *domains |= RADEON_DOMAIN_GTT;
831    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
832       *domains |= RADEON_DOMAIN_GDS;
833    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
834       *domains |= RADEON_DOMAIN_OA;
835 
836    if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
837       *flags |= RADEON_FLAG_CPU_ACCESS;
838    if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
839       *flags |= RADEON_FLAG_NO_CPU_ACCESS;
840    if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
841       *flags |= RADEON_FLAG_IMPLICIT_SYNC;
842    if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
843       *flags |= RADEON_FLAG_GTT_WC;
844    if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
845       *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
846    if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
847       *flags |= RADEON_FLAG_ZERO_VRAM;
848    return true;
849 }
850 
851 static unsigned
eg_tile_split(unsigned tile_split)852 eg_tile_split(unsigned tile_split)
853 {
854    switch (tile_split) {
855    case 0:
856       tile_split = 64;
857       break;
858    case 1:
859       tile_split = 128;
860       break;
861    case 2:
862       tile_split = 256;
863       break;
864    case 3:
865       tile_split = 512;
866       break;
867    default:
868    case 4:
869       tile_split = 1024;
870       break;
871    case 5:
872       tile_split = 2048;
873       break;
874    case 6:
875       tile_split = 4096;
876       break;
877    }
878    return tile_split;
879 }
880 
881 static unsigned
radv_eg_tile_split_rev(unsigned eg_tile_split)882 radv_eg_tile_split_rev(unsigned eg_tile_split)
883 {
884    switch (eg_tile_split) {
885    case 64:
886       return 0;
887    case 128:
888       return 1;
889    case 256:
890       return 2;
891    case 512:
892       return 3;
893    default:
894    case 1024:
895       return 4;
896    case 2048:
897       return 5;
898    case 4096:
899       return 6;
900    }
901 }
902 
903 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
904 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK  0x3
905 
906 static void
radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)907 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
908                                    struct radeon_bo_metadata *md)
909 {
910    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
911    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
912    struct amdgpu_bo_metadata metadata = {0};
913    uint64_t tiling_flags = 0;
914 
915    if (ws->info.chip_class >= GFX9) {
916       tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
917       tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
918       tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
919       tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
920       tiling_flags |=
921          AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
922       tiling_flags |=
923          AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
924       tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
925    } else {
926       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
927          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
928       else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
929          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
930       else
931          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
932 
933       tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
934       tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
935       tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
936       if (md->u.legacy.tile_split)
937          tiling_flags |=
938             AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
939       tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
940       tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1);
941 
942       if (md->u.legacy.scanout)
943          tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
944       else
945          tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
946    }
947 
948    metadata.tiling_info = tiling_flags;
949    metadata.size_metadata = md->size_metadata;
950    memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
951 
952    amdgpu_bo_set_metadata(bo->bo, &metadata);
953 }
954 
955 static void
radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)956 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
957                                    struct radeon_bo_metadata *md)
958 {
959    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
960    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
961    struct amdgpu_bo_info info = {0};
962 
963    int r = amdgpu_bo_query_info(bo->bo, &info);
964    if (r)
965       return;
966 
967    uint64_t tiling_flags = info.metadata.tiling_info;
968 
969    if (ws->info.chip_class >= GFX9) {
970       md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
971       md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
972    } else {
973       md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
974       md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
975 
976       if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
977          md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
978       else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
979          md->u.legacy.microtile = RADEON_LAYOUT_TILED;
980 
981       md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
982       md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
983       md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
984       md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
985       md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
986       md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
987       md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
988    }
989 
990    md->size_metadata = info.metadata.size_metadata;
991    memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
992 }
993 
994 static VkResult
radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,bool resident)995 radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
996                                     bool resident)
997 {
998    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
999    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
1000    VkResult result = VK_SUCCESS;
1001 
1002    /* Do not add the BO to the global list if it's a local BO because the
1003     * kernel maintains a list for us.
1004     */
1005    if (bo->base.is_local)
1006       return VK_SUCCESS;
1007 
1008    /* Do not add the BO twice to the global list if the allbos debug
1009     * option is enabled.
1010     */
1011    if (ws->debug_all_bos)
1012       return VK_SUCCESS;
1013 
1014    if (resident) {
1015       result = radv_amdgpu_global_bo_list_add(ws, bo);
1016    } else {
1017       radv_amdgpu_global_bo_list_del(ws, bo);
1018    }
1019 
1020    return result;
1021 }
1022 
1023 static int
radv_amdgpu_bo_va_compare(const void * a,const void * b)1024 radv_amdgpu_bo_va_compare(const void *a, const void *b)
1025 {
1026    const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo *const *)a;
1027    const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo *const *)b;
1028    return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
1029 }
1030 
1031 static void
radv_amdgpu_dump_bo_log(struct radeon_winsys * _ws,FILE * file)1032 radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
1033 {
1034    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1035    struct radv_amdgpu_winsys_bo_log *bo_log;
1036 
1037    if (!ws->debug_log_bos)
1038       return;
1039 
1040    u_rwlock_rdlock(&ws->log_bo_list_lock);
1041    LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) {
1042       fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n",
1043               (long long)bo_log->timestamp, (long long)bo_log->va,
1044               (long long)(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual);
1045    }
1046    u_rwlock_rdunlock(&ws->log_bo_list_lock);
1047 }
1048 
1049 static void
radv_amdgpu_dump_bo_ranges(struct radeon_winsys * _ws,FILE * file)1050 radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
1051 {
1052    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1053    if (ws->debug_all_bos) {
1054       struct radv_amdgpu_winsys_bo **bos = NULL;
1055       int i = 0;
1056 
1057       u_rwlock_rdlock(&ws->global_bo_list.lock);
1058       bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
1059       if (!bos) {
1060          u_rwlock_rdunlock(&ws->global_bo_list.lock);
1061          fprintf(file, "  Failed to allocate memory to sort VA ranges for dumping\n");
1062          return;
1063       }
1064 
1065       for (i = 0; i < ws->global_bo_list.count; i++) {
1066          bos[i] = ws->global_bo_list.bos[i];
1067       }
1068       qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
1069 
1070       for (i = 0; i < ws->global_bo_list.count; ++i) {
1071          fprintf(file, "  VA=%.16llx-%.16llx, handle=%d%s\n", (long long)bos[i]->base.va,
1072                  (long long)(bos[i]->base.va + bos[i]->size), bos[i]->bo_handle,
1073                  bos[i]->is_virtual ? " sparse" : "");
1074       }
1075       free(bos);
1076       u_rwlock_rdunlock(&ws->global_bo_list.lock);
1077    } else
1078       fprintf(file, "  To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
1079 }
1080 void
radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys * ws)1081 radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
1082 {
1083    ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
1084    ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
1085    ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
1086    ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
1087    ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
1088    ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
1089    ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
1090    ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
1091    ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
1092    ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
1093    ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
1094    ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
1095    ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
1096    ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
1097 }
1098