1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include <stdio.h>
30
31 #include "radv_amdgpu_bo.h"
32 #include "radv_debug.h"
33
34 #include <amdgpu.h>
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38 #include "drm-uapi/amdgpu_drm.h"
39
40 #include "util/os_time.h"
41 #include "util/u_atomic.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44
45 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo);
46
47 static int
radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys * ws,amdgpu_bo_handle bo,uint64_t offset,uint64_t size,uint64_t addr,uint32_t bo_flags,uint64_t internal_flags,uint32_t ops)48 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset,
49 uint64_t size, uint64_t addr, uint32_t bo_flags, uint64_t internal_flags,
50 uint32_t ops)
51 {
52 uint64_t flags = internal_flags;
53 if (bo) {
54 flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
55
56 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
57 flags |= AMDGPU_VM_MTYPE_UC;
58
59 if (!(bo_flags & RADEON_FLAG_READ_ONLY))
60 flags |= AMDGPU_VM_PAGE_WRITEABLE;
61 }
62
63 size = align64(size, getpagesize());
64
65 return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, flags, ops);
66 }
67
68 static void
radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo,const struct radv_amdgpu_map_range * range)69 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo,
70 const struct radv_amdgpu_map_range *range)
71 {
72 uint64_t internal_flags = 0;
73 assert(range->size);
74
75 if (!range->bo) {
76 if (!ws->info.has_sparse_vm_mappings)
77 return;
78
79 internal_flags |= AMDGPU_VM_PAGE_PRT;
80 } else
81 p_atomic_inc(&range->bo->ref_count);
82
83 int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL, range->bo_offset, range->size,
84 range->offset + bo->base.va, 0, internal_flags, AMDGPU_VA_OP_MAP);
85 if (r)
86 abort();
87 }
88
89 static void
radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo,const struct radv_amdgpu_map_range * range)90 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo,
91 const struct radv_amdgpu_map_range *range)
92 {
93 uint64_t internal_flags = 0;
94 assert(range->size);
95
96 if (!range->bo) {
97 if (!ws->info.has_sparse_vm_mappings)
98 return;
99
100 /* Even though this is an unmap, if we don't set this flag,
101 AMDGPU is going to complain about the missing buffer. */
102 internal_flags |= AMDGPU_VM_PAGE_PRT;
103 }
104
105 int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL, range->bo_offset, range->size,
106 range->offset + bo->base.va, 0, internal_flags, AMDGPU_VA_OP_UNMAP);
107 if (r)
108 abort();
109
110 if (range->bo)
111 ws->base.buffer_destroy(&ws->base, (struct radeon_winsys_bo *)range->bo);
112 }
113
114 static int
bo_comparator(const void * ap,const void * bp)115 bo_comparator(const void *ap, const void *bp)
116 {
117 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
118 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
119 return (a > b) ? 1 : (a < b) ? -1 : 0;
120 }
121
122 static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo * bo)123 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
124 {
125 if (bo->bo_capacity < bo->range_count) {
126 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
127 struct radv_amdgpu_winsys_bo **bos =
128 realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
129 if (!bos)
130 return VK_ERROR_OUT_OF_HOST_MEMORY;
131 bo->bos = bos;
132 bo->bo_capacity = new_count;
133 }
134
135 uint32_t temp_bo_count = 0;
136 for (uint32_t i = 0; i < bo->range_count; ++i)
137 if (bo->ranges[i].bo)
138 bo->bos[temp_bo_count++] = bo->ranges[i].bo;
139
140 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
141
142 uint32_t final_bo_count = 1;
143 for (uint32_t i = 1; i < temp_bo_count; ++i)
144 if (bo->bos[i] != bo->bos[i - 1])
145 bo->bos[final_bo_count++] = bo->bos[i];
146
147 bo->bo_count = final_bo_count;
148
149 return VK_SUCCESS;
150 }
151
152 static VkResult
radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys * _ws,struct radeon_winsys_bo * _parent,uint64_t offset,uint64_t size,struct radeon_winsys_bo * _bo,uint64_t bo_offset)153 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent,
154 uint64_t offset, uint64_t size, struct radeon_winsys_bo *_bo,
155 uint64_t bo_offset)
156 {
157 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
158 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
159 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
160 int range_count_delta, new_idx;
161 int first = 0, last;
162 struct radv_amdgpu_map_range new_first, new_last;
163 VkResult result;
164
165 assert(parent->is_virtual);
166 assert(!bo || !bo->is_virtual);
167
168 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
169 * contains the newly bound range). */
170 if (parent->range_capacity - parent->range_count < 2) {
171 uint32_t range_capacity = parent->range_capacity + 2;
172 struct radv_amdgpu_map_range *ranges =
173 realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
174 if (!ranges)
175 return VK_ERROR_OUT_OF_HOST_MEMORY;
176 parent->ranges = ranges;
177 parent->range_capacity = range_capacity;
178 }
179
180 /*
181 * [first, last] is exactly the range of ranges that either overlap the
182 * new parent, or are adjacent to it. This corresponds to the bind ranges
183 * that may change.
184 */
185 while (first + 1 < parent->range_count &&
186 parent->ranges[first].offset + parent->ranges[first].size < offset)
187 ++first;
188
189 last = first;
190 while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
191 ++last;
192
193 /* Whether the first or last range are going to be totally removed or just
194 * resized/left alone. Note that in the case of first == last, we will split
195 * this into a part before and after the new range. The remove flag is then
196 * whether to not create the corresponding split part. */
197 bool remove_first = parent->ranges[first].offset == offset;
198 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
199 bool unmapped_first = false;
200
201 assert(parent->ranges[first].offset <= offset);
202 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
203
204 /* Try to merge the new range with the first range. */
205 if (parent->ranges[first].bo == bo &&
206 (!bo ||
207 offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
208 size += offset - parent->ranges[first].offset;
209 offset = parent->ranges[first].offset;
210 bo_offset = parent->ranges[first].bo_offset;
211 remove_first = true;
212 }
213
214 /* Try to merge the new range with the last range. */
215 if (parent->ranges[last].bo == bo &&
216 (!bo ||
217 offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
218 size = parent->ranges[last].offset + parent->ranges[last].size - offset;
219 remove_last = true;
220 }
221
222 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
223 new_idx = first + !remove_first;
224
225 /* Any range between first and last is going to be entirely covered by the new range so just
226 * unmap them. */
227 for (int i = first + 1; i < last; ++i)
228 radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + i);
229
230 /* If the first/last range are not left alone we unmap then and optionally map
231 * them again after modifications. Not that this implicitly can do the splitting
232 * if first == last. */
233 new_first = parent->ranges[first];
234 new_last = parent->ranges[last];
235
236 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
237 radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + first);
238 unmapped_first = true;
239
240 if (!remove_first) {
241 new_first.size = offset - new_first.offset;
242 radv_amdgpu_winsys_virtual_map(ws, parent, &new_first);
243 }
244 }
245
246 if (parent->ranges[last].offset < offset + size || remove_last) {
247 if (first != last || !unmapped_first)
248 radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + last);
249
250 if (!remove_last) {
251 new_last.size -= offset + size - new_last.offset;
252 new_last.bo_offset += (offset + size - new_last.offset);
253 new_last.offset = offset + size;
254 radv_amdgpu_winsys_virtual_map(ws, parent, &new_last);
255 }
256 }
257
258 /* Moves the range list after last to account for the changed number of ranges. */
259 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
260 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
261
262 if (!remove_first)
263 parent->ranges[first] = new_first;
264
265 if (!remove_last)
266 parent->ranges[new_idx + 1] = new_last;
267
268 /* Actually set up the new range. */
269 parent->ranges[new_idx].offset = offset;
270 parent->ranges[new_idx].size = size;
271 parent->ranges[new_idx].bo = bo;
272 parent->ranges[new_idx].bo_offset = bo_offset;
273
274 radv_amdgpu_winsys_virtual_map(ws, parent, parent->ranges + new_idx);
275
276 parent->range_count += range_count_delta;
277
278 result = radv_amdgpu_winsys_rebuild_bo_list(parent);
279 if (result != VK_SUCCESS)
280 return result;
281
282 return VK_SUCCESS;
283 }
284
285 struct radv_amdgpu_winsys_bo_log {
286 struct list_head list;
287 uint64_t va;
288 uint64_t size;
289 uint64_t timestamp; /* CPU timestamp */
290 uint8_t is_virtual : 1;
291 uint8_t destroyed : 1;
292 };
293
294 static void
radv_amdgpu_log_bo(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo,bool destroyed)295 radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, bool destroyed)
296 {
297 struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
298
299 if (!ws->debug_log_bos)
300 return;
301
302 bo_log = malloc(sizeof(*bo_log));
303 if (!bo_log)
304 return;
305
306 bo_log->va = bo->base.va;
307 bo_log->size = bo->size;
308 bo_log->timestamp = os_time_get_nano();
309 bo_log->is_virtual = bo->is_virtual;
310 bo_log->destroyed = destroyed;
311
312 u_rwlock_wrlock(&ws->log_bo_list_lock);
313 list_addtail(&bo_log->list, &ws->log_bo_list);
314 u_rwlock_wrunlock(&ws->log_bo_list_lock);
315 }
316
317 static int
radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)318 radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
319 {
320 u_rwlock_wrlock(&ws->global_bo_list.lock);
321 if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
322 unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
323 void *data =
324 realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
325 if (!data) {
326 u_rwlock_wrunlock(&ws->global_bo_list.lock);
327 return VK_ERROR_OUT_OF_HOST_MEMORY;
328 }
329
330 ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
331 ws->global_bo_list.capacity = capacity;
332 }
333
334 ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
335 bo->base.use_global_list = true;
336 u_rwlock_wrunlock(&ws->global_bo_list.lock);
337 return VK_SUCCESS;
338 }
339
340 static void
radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)341 radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
342 {
343 u_rwlock_wrlock(&ws->global_bo_list.lock);
344 for (unsigned i = ws->global_bo_list.count; i-- > 0;) {
345 if (ws->global_bo_list.bos[i] == bo) {
346 ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
347 --ws->global_bo_list.count;
348 bo->base.use_global_list = false;
349 break;
350 }
351 }
352 u_rwlock_wrunlock(&ws->global_bo_list.lock);
353 }
354
355 static void
radv_amdgpu_winsys_bo_destroy(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo)356 radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
357 {
358 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
359 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
360
361 if (p_atomic_dec_return(&bo->ref_count))
362 return;
363
364 radv_amdgpu_log_bo(ws, bo, true);
365
366 if (bo->is_virtual) {
367 for (uint32_t i = 0; i < bo->range_count; ++i) {
368 radv_amdgpu_winsys_virtual_unmap(ws, bo, bo->ranges + i);
369 }
370 free(bo->bos);
371 free(bo->ranges);
372 } else {
373 if (ws->debug_all_bos)
374 radv_amdgpu_global_bo_list_del(ws, bo);
375 radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_UNMAP);
376 amdgpu_bo_free(bo->bo);
377 }
378
379 if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
380 if (bo->base.vram_no_cpu_access) {
381 p_atomic_add(&ws->allocated_vram, -align64(bo->size, ws->info.gart_page_size));
382 } else {
383 p_atomic_add(&ws->allocated_vram_vis, -align64(bo->size, ws->info.gart_page_size));
384 }
385 }
386
387 if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
388 p_atomic_add(&ws->allocated_gtt, -align64(bo->size, ws->info.gart_page_size));
389
390 amdgpu_va_range_free(bo->va_handle);
391 FREE(bo);
392 }
393
394 static VkResult
radv_amdgpu_winsys_bo_create(struct radeon_winsys * _ws,uint64_t size,unsigned alignment,enum radeon_bo_domain initial_domain,enum radeon_bo_flag flags,unsigned priority,uint64_t replay_address,struct radeon_winsys_bo ** out_bo)395 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
396 enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
397 unsigned priority, uint64_t replay_address,
398 struct radeon_winsys_bo **out_bo)
399 {
400 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
401 struct radv_amdgpu_winsys_bo *bo;
402 struct amdgpu_bo_alloc_request request = {0};
403 struct radv_amdgpu_map_range *ranges = NULL;
404 amdgpu_bo_handle buf_handle;
405 uint64_t va = 0;
406 amdgpu_va_handle va_handle;
407 int r;
408 VkResult result = VK_SUCCESS;
409
410 /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
411 */
412 *out_bo = NULL;
413
414 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
415 if (!bo) {
416 return VK_ERROR_OUT_OF_HOST_MEMORY;
417 }
418
419 unsigned virt_alignment = alignment;
420 if (size >= ws->info.pte_fragment_size)
421 virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
422
423 assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE));
424
425 const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH |
426 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
427 (flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0);
428 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address,
429 &va, &va_handle, va_flags);
430 if (r) {
431 result =
432 replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
433 goto error_va_alloc;
434 }
435
436 bo->base.va = va;
437 bo->va_handle = va_handle;
438 bo->size = size;
439 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
440 bo->ref_count = 1;
441
442 if (flags & RADEON_FLAG_VIRTUAL) {
443 ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
444 if (!ranges) {
445 result = VK_ERROR_OUT_OF_HOST_MEMORY;
446 goto error_ranges_alloc;
447 }
448
449 bo->ranges = ranges;
450 bo->range_count = 1;
451 bo->range_capacity = 1;
452
453 bo->ranges[0].offset = 0;
454 bo->ranges[0].size = size;
455 bo->ranges[0].bo = NULL;
456 bo->ranges[0].bo_offset = 0;
457
458 radv_amdgpu_winsys_virtual_map(ws, bo, bo->ranges);
459 radv_amdgpu_log_bo(ws, bo, false);
460
461 *out_bo = (struct radeon_winsys_bo *)bo;
462 return VK_SUCCESS;
463 }
464
465 request.alloc_size = size;
466 request.phys_alignment = alignment;
467
468 if (initial_domain & RADEON_DOMAIN_VRAM) {
469 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
470
471 /* Since VRAM and GTT have almost the same performance on
472 * APUs, we could just set GTT. However, in order to decrease
473 * GTT(RAM) usage, which is shared with the OS, allow VRAM
474 * placements too. The idea is not to use VRAM usefully, but
475 * to use it so that it's not unused and wasted.
476 *
477 * Furthermore, even on discrete GPUs this is beneficial. If
478 * both GTT and VRAM are set then AMDGPU still prefers VRAM
479 * for the initial placement, but it makes the buffers
480 * spillable. Otherwise AMDGPU tries to place the buffers in
481 * VRAM really hard to the extent that we are getting a lot
482 * of unnecessary movement. This helps significantly when
483 * e.g. Horizon Zero Dawn allocates more memory than we have
484 * VRAM.
485 */
486 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
487 }
488
489 if (initial_domain & RADEON_DOMAIN_GTT)
490 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
491 if (initial_domain & RADEON_DOMAIN_GDS)
492 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
493 if (initial_domain & RADEON_DOMAIN_OA)
494 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
495
496 if (flags & RADEON_FLAG_CPU_ACCESS)
497 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
498 if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
499 bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
500 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
501 }
502 if (flags & RADEON_FLAG_GTT_WC)
503 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
504 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
505 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
506 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
507 ((ws->perftest & RADV_PERFTEST_LOCAL_BOS) || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
508 bo->base.is_local = true;
509 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
510 }
511
512 /* this won't do anything on pre 4.9 kernels */
513 if (initial_domain & RADEON_DOMAIN_VRAM) {
514 if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
515 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
516 }
517
518 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
519 if (r) {
520 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
521 fprintf(stderr, "amdgpu: size : %" PRIu64 " bytes\n", size);
522 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
523 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
524 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
525 goto error_bo_alloc;
526 }
527
528 r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0, AMDGPU_VA_OP_MAP);
529 if (r) {
530 result = VK_ERROR_UNKNOWN;
531 goto error_va_map;
532 }
533
534 bo->bo = buf_handle;
535 bo->base.initial_domain = initial_domain;
536 bo->base.use_global_list = bo->base.is_local;
537 bo->is_shared = false;
538 bo->priority = priority;
539
540 r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
541 assert(!r);
542
543 if (initial_domain & RADEON_DOMAIN_VRAM) {
544 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
545 * aren't mappable and they are counted as part of the VRAM
546 * counter.
547 *
548 * Otherwise, buffers with the CPU_ACCESS flag or without any
549 * of both (imported buffers) are counted as part of the VRAM
550 * visible counter because they can be mapped.
551 */
552 if (bo->base.vram_no_cpu_access) {
553 p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
554 } else {
555 p_atomic_add(&ws->allocated_vram_vis, align64(bo->size, ws->info.gart_page_size));
556 }
557 }
558
559 if (initial_domain & RADEON_DOMAIN_GTT)
560 p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
561
562 if (ws->debug_all_bos)
563 radv_amdgpu_global_bo_list_add(ws, bo);
564 radv_amdgpu_log_bo(ws, bo, false);
565
566 *out_bo = (struct radeon_winsys_bo *)bo;
567 return VK_SUCCESS;
568 error_va_map:
569 amdgpu_bo_free(buf_handle);
570
571 error_bo_alloc:
572 free(ranges);
573
574 error_ranges_alloc:
575 amdgpu_va_range_free(va_handle);
576
577 error_va_alloc:
578 FREE(bo);
579 return result;
580 }
581
582 static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo * _bo)583 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
584 {
585 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
586 int ret;
587 void *data;
588 ret = amdgpu_bo_cpu_map(bo->bo, &data);
589 if (ret)
590 return NULL;
591 return data;
592 }
593
594 static void
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo * _bo)595 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
596 {
597 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
598 amdgpu_bo_cpu_unmap(bo->bo);
599 }
600
601 static uint64_t
radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys * ws,uint64_t size,unsigned alignment)602 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size,
603 unsigned alignment)
604 {
605 uint64_t vm_alignment = alignment;
606
607 /* Increase the VM alignment for faster address translation. */
608 if (size >= ws->info.pte_fragment_size)
609 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
610
611 /* Gfx9: Increase the VM alignment to the most significant bit set
612 * in the size for faster address translation.
613 */
614 if (ws->info.chip_class >= GFX9) {
615 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
616 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
617
618 vm_alignment = MAX2(vm_alignment, msb_alignment);
619 }
620 return vm_alignment;
621 }
622
623 static VkResult
radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys * _ws,void * pointer,uint64_t size,unsigned priority,struct radeon_winsys_bo ** out_bo)624 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size,
625 unsigned priority, struct radeon_winsys_bo **out_bo)
626 {
627 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
628 amdgpu_bo_handle buf_handle;
629 struct radv_amdgpu_winsys_bo *bo;
630 uint64_t va;
631 amdgpu_va_handle va_handle;
632 uint64_t vm_alignment;
633 VkResult result = VK_SUCCESS;
634
635 /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
636 */
637 *out_bo = NULL;
638
639 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
640 if (!bo)
641 return VK_ERROR_OUT_OF_HOST_MEMORY;
642
643 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle)) {
644 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
645 goto error;
646 }
647
648 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
649 * are imported.
650 */
651 vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size);
652
653 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va,
654 &va_handle, AMDGPU_VA_RANGE_HIGH)) {
655 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
656 goto error_va_alloc;
657 }
658
659 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP)) {
660 result = VK_ERROR_UNKNOWN;
661 goto error_va_map;
662 }
663
664 /* Initialize it */
665 bo->base.va = va;
666 bo->va_handle = va_handle;
667 bo->size = size;
668 bo->ref_count = 1;
669 bo->bo = buf_handle;
670 bo->base.initial_domain = RADEON_DOMAIN_GTT;
671 bo->base.use_global_list = false;
672 bo->priority = priority;
673
674 ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
675 assert(!r);
676
677 p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
678
679 if (ws->debug_all_bos)
680 radv_amdgpu_global_bo_list_add(ws, bo);
681 radv_amdgpu_log_bo(ws, bo, false);
682
683 *out_bo = (struct radeon_winsys_bo *)bo;
684 return VK_SUCCESS;
685
686 error_va_map:
687 amdgpu_va_range_free(va_handle);
688
689 error_va_alloc:
690 amdgpu_bo_free(buf_handle);
691
692 error:
693 FREE(bo);
694 return result;
695 }
696
697 static VkResult
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys * _ws,int fd,unsigned priority,struct radeon_winsys_bo ** out_bo,uint64_t * alloc_size)698 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority,
699 struct radeon_winsys_bo **out_bo, uint64_t *alloc_size)
700 {
701 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
702 struct radv_amdgpu_winsys_bo *bo;
703 uint64_t va;
704 amdgpu_va_handle va_handle;
705 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
706 struct amdgpu_bo_import_result result = {0};
707 struct amdgpu_bo_info info = {0};
708 enum radeon_bo_domain initial = 0;
709 int r;
710 VkResult vk_result = VK_SUCCESS;
711
712 /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
713 */
714 *out_bo = NULL;
715
716 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
717 if (!bo)
718 return VK_ERROR_OUT_OF_HOST_MEMORY;
719
720 r = amdgpu_bo_import(ws->dev, type, fd, &result);
721 if (r) {
722 vk_result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
723 goto error;
724 }
725
726 r = amdgpu_bo_query_info(result.buf_handle, &info);
727 if (r) {
728 vk_result = VK_ERROR_UNKNOWN;
729 goto error_query;
730 }
731
732 if (alloc_size) {
733 *alloc_size = info.alloc_size;
734 }
735
736 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0,
737 &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
738 if (r) {
739 vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
740 goto error_query;
741 }
742
743 r =
744 radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
745 if (r) {
746 vk_result = VK_ERROR_UNKNOWN;
747 goto error_va_map;
748 }
749
750 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
751 initial |= RADEON_DOMAIN_VRAM;
752 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
753 initial |= RADEON_DOMAIN_GTT;
754
755 bo->bo = result.buf_handle;
756 bo->base.va = va;
757 bo->va_handle = va_handle;
758 bo->base.initial_domain = initial;
759 bo->base.use_global_list = false;
760 bo->size = result.alloc_size;
761 bo->is_shared = true;
762 bo->priority = priority;
763 bo->ref_count = 1;
764
765 r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
766 assert(!r);
767
768 if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
769 p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
770 if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
771 p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
772
773 if (ws->debug_all_bos)
774 radv_amdgpu_global_bo_list_add(ws, bo);
775 radv_amdgpu_log_bo(ws, bo, false);
776
777 *out_bo = (struct radeon_winsys_bo *)bo;
778 return VK_SUCCESS;
779 error_va_map:
780 amdgpu_va_range_free(va_handle);
781
782 error_query:
783 amdgpu_bo_free(result.buf_handle);
784
785 error:
786 FREE(bo);
787 return vk_result;
788 }
789
790 static bool
radv_amdgpu_winsys_get_fd(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,int * fd)791 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, int *fd)
792 {
793 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
794 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
795 int r;
796 unsigned handle;
797 r = amdgpu_bo_export(bo->bo, type, &handle);
798 if (r)
799 return false;
800
801 *fd = (int)handle;
802 bo->is_shared = true;
803 return true;
804 }
805
806 static bool
radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys * _ws,int fd,enum radeon_bo_domain * domains,enum radeon_bo_flag * flags)807 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd, enum radeon_bo_domain *domains,
808 enum radeon_bo_flag *flags)
809 {
810 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
811 struct amdgpu_bo_import_result result = {0};
812 struct amdgpu_bo_info info = {0};
813 int r;
814
815 *domains = 0;
816 *flags = 0;
817
818 r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
819 if (r)
820 return false;
821
822 r = amdgpu_bo_query_info(result.buf_handle, &info);
823 amdgpu_bo_free(result.buf_handle);
824 if (r)
825 return false;
826
827 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
828 *domains |= RADEON_DOMAIN_VRAM;
829 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
830 *domains |= RADEON_DOMAIN_GTT;
831 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
832 *domains |= RADEON_DOMAIN_GDS;
833 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
834 *domains |= RADEON_DOMAIN_OA;
835
836 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
837 *flags |= RADEON_FLAG_CPU_ACCESS;
838 if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
839 *flags |= RADEON_FLAG_NO_CPU_ACCESS;
840 if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
841 *flags |= RADEON_FLAG_IMPLICIT_SYNC;
842 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
843 *flags |= RADEON_FLAG_GTT_WC;
844 if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
845 *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
846 if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
847 *flags |= RADEON_FLAG_ZERO_VRAM;
848 return true;
849 }
850
851 static unsigned
eg_tile_split(unsigned tile_split)852 eg_tile_split(unsigned tile_split)
853 {
854 switch (tile_split) {
855 case 0:
856 tile_split = 64;
857 break;
858 case 1:
859 tile_split = 128;
860 break;
861 case 2:
862 tile_split = 256;
863 break;
864 case 3:
865 tile_split = 512;
866 break;
867 default:
868 case 4:
869 tile_split = 1024;
870 break;
871 case 5:
872 tile_split = 2048;
873 break;
874 case 6:
875 tile_split = 4096;
876 break;
877 }
878 return tile_split;
879 }
880
881 static unsigned
radv_eg_tile_split_rev(unsigned eg_tile_split)882 radv_eg_tile_split_rev(unsigned eg_tile_split)
883 {
884 switch (eg_tile_split) {
885 case 64:
886 return 0;
887 case 128:
888 return 1;
889 case 256:
890 return 2;
891 case 512:
892 return 3;
893 default:
894 case 1024:
895 return 4;
896 case 2048:
897 return 5;
898 case 4096:
899 return 6;
900 }
901 }
902
903 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
904 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
905
906 static void
radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)907 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
908 struct radeon_bo_metadata *md)
909 {
910 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
911 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
912 struct amdgpu_bo_metadata metadata = {0};
913 uint64_t tiling_flags = 0;
914
915 if (ws->info.chip_class >= GFX9) {
916 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
917 tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
918 tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
919 tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
920 tiling_flags |=
921 AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
922 tiling_flags |=
923 AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
924 tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
925 } else {
926 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
927 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
928 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
929 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
930 else
931 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
932
933 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
934 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
935 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
936 if (md->u.legacy.tile_split)
937 tiling_flags |=
938 AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
939 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
940 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1);
941
942 if (md->u.legacy.scanout)
943 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
944 else
945 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
946 }
947
948 metadata.tiling_info = tiling_flags;
949 metadata.size_metadata = md->size_metadata;
950 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
951
952 amdgpu_bo_set_metadata(bo->bo, &metadata);
953 }
954
955 static void
radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)956 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
957 struct radeon_bo_metadata *md)
958 {
959 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
960 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
961 struct amdgpu_bo_info info = {0};
962
963 int r = amdgpu_bo_query_info(bo->bo, &info);
964 if (r)
965 return;
966
967 uint64_t tiling_flags = info.metadata.tiling_info;
968
969 if (ws->info.chip_class >= GFX9) {
970 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
971 md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
972 } else {
973 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
974 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
975
976 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
977 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
978 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
979 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
980
981 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
982 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
983 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
984 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
985 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
986 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
987 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
988 }
989
990 md->size_metadata = info.metadata.size_metadata;
991 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
992 }
993
994 static VkResult
radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,bool resident)995 radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
996 bool resident)
997 {
998 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
999 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
1000 VkResult result = VK_SUCCESS;
1001
1002 /* Do not add the BO to the global list if it's a local BO because the
1003 * kernel maintains a list for us.
1004 */
1005 if (bo->base.is_local)
1006 return VK_SUCCESS;
1007
1008 /* Do not add the BO twice to the global list if the allbos debug
1009 * option is enabled.
1010 */
1011 if (ws->debug_all_bos)
1012 return VK_SUCCESS;
1013
1014 if (resident) {
1015 result = radv_amdgpu_global_bo_list_add(ws, bo);
1016 } else {
1017 radv_amdgpu_global_bo_list_del(ws, bo);
1018 }
1019
1020 return result;
1021 }
1022
1023 static int
radv_amdgpu_bo_va_compare(const void * a,const void * b)1024 radv_amdgpu_bo_va_compare(const void *a, const void *b)
1025 {
1026 const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo *const *)a;
1027 const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo *const *)b;
1028 return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
1029 }
1030
1031 static void
radv_amdgpu_dump_bo_log(struct radeon_winsys * _ws,FILE * file)1032 radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
1033 {
1034 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1035 struct radv_amdgpu_winsys_bo_log *bo_log;
1036
1037 if (!ws->debug_log_bos)
1038 return;
1039
1040 u_rwlock_rdlock(&ws->log_bo_list_lock);
1041 LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) {
1042 fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n",
1043 (long long)bo_log->timestamp, (long long)bo_log->va,
1044 (long long)(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual);
1045 }
1046 u_rwlock_rdunlock(&ws->log_bo_list_lock);
1047 }
1048
1049 static void
radv_amdgpu_dump_bo_ranges(struct radeon_winsys * _ws,FILE * file)1050 radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
1051 {
1052 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1053 if (ws->debug_all_bos) {
1054 struct radv_amdgpu_winsys_bo **bos = NULL;
1055 int i = 0;
1056
1057 u_rwlock_rdlock(&ws->global_bo_list.lock);
1058 bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
1059 if (!bos) {
1060 u_rwlock_rdunlock(&ws->global_bo_list.lock);
1061 fprintf(file, " Failed to allocate memory to sort VA ranges for dumping\n");
1062 return;
1063 }
1064
1065 for (i = 0; i < ws->global_bo_list.count; i++) {
1066 bos[i] = ws->global_bo_list.bos[i];
1067 }
1068 qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
1069
1070 for (i = 0; i < ws->global_bo_list.count; ++i) {
1071 fprintf(file, " VA=%.16llx-%.16llx, handle=%d%s\n", (long long)bos[i]->base.va,
1072 (long long)(bos[i]->base.va + bos[i]->size), bos[i]->bo_handle,
1073 bos[i]->is_virtual ? " sparse" : "");
1074 }
1075 free(bos);
1076 u_rwlock_rdunlock(&ws->global_bo_list.lock);
1077 } else
1078 fprintf(file, " To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
1079 }
1080 void
radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys * ws)1081 radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
1082 {
1083 ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
1084 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
1085 ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
1086 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
1087 ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
1088 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
1089 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
1090 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
1091 ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
1092 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
1093 ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
1094 ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
1095 ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
1096 ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
1097 }
1098