1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include <stdio.h>
30
31 #include "radv_amdgpu_bo.h"
32
33 #include <amdgpu.h>
34 #include "drm-uapi/amdgpu_drm.h"
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38
39 #include "util/u_atomic.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42
43 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
44
45 static int
radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys * ws,amdgpu_bo_handle bo,uint64_t offset,uint64_t size,uint64_t addr,uint32_t bo_flags,uint64_t internal_flags,uint32_t ops)46 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
47 amdgpu_bo_handle bo,
48 uint64_t offset,
49 uint64_t size,
50 uint64_t addr,
51 uint32_t bo_flags,
52 uint64_t internal_flags,
53 uint32_t ops)
54 {
55 uint64_t flags = internal_flags;
56 if (bo) {
57 flags = AMDGPU_VM_PAGE_READABLE |
58 AMDGPU_VM_PAGE_EXECUTABLE;
59
60 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) &&
61 ws->info.chip_class >= GFX9)
62 flags |= AMDGPU_VM_MTYPE_UC;
63
64 if (!(bo_flags & RADEON_FLAG_READ_ONLY))
65 flags |= AMDGPU_VM_PAGE_WRITEABLE;
66 }
67
68 size = align64(size, getpagesize());
69
70 return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
71 flags, ops);
72 }
73
74 static void
radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo * bo,const struct radv_amdgpu_map_range * range)75 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
76 const struct radv_amdgpu_map_range *range)
77 {
78 uint64_t internal_flags = 0;
79 assert(range->size);
80
81 if (!range->bo) {
82 if (!bo->ws->info.has_sparse_vm_mappings)
83 return;
84
85 internal_flags |= AMDGPU_VM_PAGE_PRT;
86 } else
87 p_atomic_inc(&range->bo->ref_count);
88
89 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo ? range->bo->bo : NULL,
90 range->bo_offset, range->size,
91 range->offset + bo->base.va, 0,
92 internal_flags, AMDGPU_VA_OP_MAP);
93 if (r)
94 abort();
95 }
96
97 static void
radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo * bo,const struct radv_amdgpu_map_range * range)98 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
99 const struct radv_amdgpu_map_range *range)
100 {
101 uint64_t internal_flags = 0;
102 assert(range->size);
103
104 if (!range->bo) {
105 if(!bo->ws->info.has_sparse_vm_mappings)
106 return;
107
108 /* Even though this is an unmap, if we don't set this flag,
109 AMDGPU is going to complain about the missing buffer. */
110 internal_flags |= AMDGPU_VM_PAGE_PRT;
111 }
112
113 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo ? range->bo->bo : NULL,
114 range->bo_offset, range->size,
115 range->offset + bo->base.va, 0, internal_flags,
116 AMDGPU_VA_OP_UNMAP);
117 if (r)
118 abort();
119
120 if (range->bo)
121 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
122 }
123
bo_comparator(const void * ap,const void * bp)124 static int bo_comparator(const void *ap, const void *bp) {
125 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
126 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
127 return (a > b) ? 1 : (a < b) ? -1 : 0;
128 }
129
130 static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo * bo)131 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
132 {
133 if (bo->bo_capacity < bo->range_count) {
134 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
135 struct radv_amdgpu_winsys_bo **bos =
136 realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
137 if (!bos)
138 return VK_ERROR_OUT_OF_HOST_MEMORY;
139 bo->bos = bos;
140 bo->bo_capacity = new_count;
141 }
142
143 uint32_t temp_bo_count = 0;
144 for (uint32_t i = 0; i < bo->range_count; ++i)
145 if (bo->ranges[i].bo)
146 bo->bos[temp_bo_count++] = bo->ranges[i].bo;
147
148 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
149
150 uint32_t final_bo_count = 1;
151 for (uint32_t i = 1; i < temp_bo_count; ++i)
152 if (bo->bos[i] != bo->bos[i - 1])
153 bo->bos[final_bo_count++] = bo->bos[i];
154
155 bo->bo_count = final_bo_count;
156
157 return VK_SUCCESS;
158 }
159
160 static VkResult
radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo * _parent,uint64_t offset,uint64_t size,struct radeon_winsys_bo * _bo,uint64_t bo_offset)161 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
162 uint64_t offset, uint64_t size,
163 struct radeon_winsys_bo *_bo, uint64_t bo_offset)
164 {
165 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
166 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
167 int range_count_delta, new_idx;
168 int first = 0, last;
169 struct radv_amdgpu_map_range new_first, new_last;
170 VkResult result;
171
172 assert(parent->is_virtual);
173 assert(!bo || !bo->is_virtual);
174
175 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
176 if (parent->range_capacity - parent->range_count < 2) {
177 uint32_t range_capacity = parent->range_capacity + 2;
178 struct radv_amdgpu_map_range *ranges =
179 realloc(parent->ranges,
180 range_capacity * sizeof(struct radv_amdgpu_map_range));
181 if (!ranges)
182 return VK_ERROR_OUT_OF_HOST_MEMORY;
183 parent->ranges = ranges;
184 parent->range_capacity = range_capacity;
185 }
186
187 /*
188 * [first, last] is exactly the range of ranges that either overlap the
189 * new parent, or are adjacent to it. This corresponds to the bind ranges
190 * that may change.
191 */
192 while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
193 ++first;
194
195 last = first;
196 while(last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
197 ++last;
198
199 /* Whether the first or last range are going to be totally removed or just
200 * resized/left alone. Note that in the case of first == last, we will split
201 * this into a part before and after the new range. The remove flag is then
202 * whether to not create the corresponding split part. */
203 bool remove_first = parent->ranges[first].offset == offset;
204 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
205 bool unmapped_first = false;
206
207 assert(parent->ranges[first].offset <= offset);
208 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
209
210 /* Try to merge the new range with the first range. */
211 if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
212 size += offset - parent->ranges[first].offset;
213 offset = parent->ranges[first].offset;
214 bo_offset = parent->ranges[first].bo_offset;
215 remove_first = true;
216 }
217
218 /* Try to merge the new range with the last range. */
219 if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
220 size = parent->ranges[last].offset + parent->ranges[last].size - offset;
221 remove_last = true;
222 }
223
224 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
225 new_idx = first + !remove_first;
226
227 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
228 for (int i = first + 1; i < last; ++i)
229 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
230
231 /* If the first/last range are not left alone we unmap then and optionally map
232 * them again after modifications. Not that this implicitly can do the splitting
233 * if first == last. */
234 new_first = parent->ranges[first];
235 new_last = parent->ranges[last];
236
237 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
238 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
239 unmapped_first = true;
240
241 if (!remove_first) {
242 new_first.size = offset - new_first.offset;
243 radv_amdgpu_winsys_virtual_map(parent, &new_first);
244 }
245 }
246
247 if (parent->ranges[last].offset < offset + size || remove_last) {
248 if (first != last || !unmapped_first)
249 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
250
251 if (!remove_last) {
252 new_last.size -= offset + size - new_last.offset;
253 new_last.bo_offset += (offset + size - new_last.offset);
254 new_last.offset = offset + size;
255 radv_amdgpu_winsys_virtual_map(parent, &new_last);
256 }
257 }
258
259 /* Moves the range list after last to account for the changed number of ranges. */
260 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
261 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
262
263 if (!remove_first)
264 parent->ranges[first] = new_first;
265
266 if (!remove_last)
267 parent->ranges[new_idx + 1] = new_last;
268
269 /* Actually set up the new range. */
270 parent->ranges[new_idx].offset = offset;
271 parent->ranges[new_idx].size = size;
272 parent->ranges[new_idx].bo = bo;
273 parent->ranges[new_idx].bo_offset = bo_offset;
274
275 radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
276
277 parent->range_count += range_count_delta;
278
279 result = radv_amdgpu_winsys_rebuild_bo_list(parent);
280 if (result != VK_SUCCESS)
281 return result;
282
283 return VK_SUCCESS;
284 }
285
radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo * _bo)286 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
287 {
288 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
289 struct radv_amdgpu_winsys *ws = bo->ws;
290
291 if (p_atomic_dec_return(&bo->ref_count))
292 return;
293 if (bo->is_virtual) {
294 for (uint32_t i = 0; i < bo->range_count; ++i) {
295 radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
296 }
297 free(bo->bos);
298 free(bo->ranges);
299 } else {
300 if (bo->ws->debug_all_bos) {
301 u_rwlock_wrlock(&bo->ws->global_bo_list_lock);
302 list_del(&bo->global_list_item);
303 bo->ws->num_buffers--;
304 u_rwlock_wrunlock(&bo->ws->global_bo_list_lock);
305 }
306 radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va,
307 0, 0, AMDGPU_VA_OP_UNMAP);
308 amdgpu_bo_free(bo->bo);
309 }
310
311 if (bo->initial_domain & RADEON_DOMAIN_VRAM) {
312 if (bo->base.vram_no_cpu_access) {
313 p_atomic_add(&ws->allocated_vram,
314 -align64(bo->size, ws->info.gart_page_size));
315 } else {
316 p_atomic_add(&ws->allocated_vram_vis,
317 -align64(bo->size, ws->info.gart_page_size));
318 }
319 }
320
321 if (bo->initial_domain & RADEON_DOMAIN_GTT)
322 p_atomic_add(&ws->allocated_gtt,
323 -align64(bo->size, ws->info.gart_page_size));
324
325 amdgpu_va_range_free(bo->va_handle);
326 FREE(bo);
327 }
328
radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo * bo)329 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo)
330 {
331 struct radv_amdgpu_winsys *ws = bo->ws;
332
333 if (bo->ws->debug_all_bos) {
334 u_rwlock_wrlock(&ws->global_bo_list_lock);
335 list_addtail(&bo->global_list_item, &ws->global_bo_list);
336 ws->num_buffers++;
337 u_rwlock_wrunlock(&ws->global_bo_list_lock);
338 }
339 }
340
341 static struct radeon_winsys_bo *
radv_amdgpu_winsys_bo_create(struct radeon_winsys * _ws,uint64_t size,unsigned alignment,enum radeon_bo_domain initial_domain,unsigned flags,unsigned priority)342 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
343 uint64_t size,
344 unsigned alignment,
345 enum radeon_bo_domain initial_domain,
346 unsigned flags,
347 unsigned priority)
348 {
349 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
350 struct radv_amdgpu_winsys_bo *bo;
351 struct amdgpu_bo_alloc_request request = {0};
352 struct radv_amdgpu_map_range *ranges = NULL;
353 amdgpu_bo_handle buf_handle;
354 uint64_t va = 0;
355 amdgpu_va_handle va_handle;
356 int r;
357 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
358 if (!bo) {
359 return NULL;
360 }
361
362 unsigned virt_alignment = alignment;
363 if (size >= ws->info.pte_fragment_size)
364 virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
365
366 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
367 size, virt_alignment, 0, &va, &va_handle,
368 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
369 AMDGPU_VA_RANGE_HIGH);
370 if (r)
371 goto error_va_alloc;
372
373 bo->base.va = va;
374 bo->va_handle = va_handle;
375 bo->size = size;
376 bo->ws = ws;
377 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
378 bo->ref_count = 1;
379
380 if (flags & RADEON_FLAG_VIRTUAL) {
381 ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
382 if (!ranges)
383 goto error_ranges_alloc;
384
385 bo->ranges = ranges;
386 bo->range_count = 1;
387 bo->range_capacity = 1;
388
389 bo->ranges[0].offset = 0;
390 bo->ranges[0].size = size;
391 bo->ranges[0].bo = NULL;
392 bo->ranges[0].bo_offset = 0;
393
394 radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
395 return (struct radeon_winsys_bo *)bo;
396 }
397
398 request.alloc_size = size;
399 request.phys_alignment = alignment;
400
401 if (initial_domain & RADEON_DOMAIN_VRAM) {
402 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
403
404 /* Since VRAM and GTT have almost the same performance on
405 * APUs, we could just set GTT. However, in order to decrease
406 * GTT(RAM) usage, which is shared with the OS, allow VRAM
407 * placements too. The idea is not to use VRAM usefully, but
408 * to use it so that it's not unused and wasted.
409 */
410 if (!ws->info.has_dedicated_vram)
411 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
412 }
413
414 if (initial_domain & RADEON_DOMAIN_GTT)
415 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
416 if (initial_domain & RADEON_DOMAIN_GDS)
417 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
418 if (initial_domain & RADEON_DOMAIN_OA)
419 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
420
421 if (flags & RADEON_FLAG_CPU_ACCESS)
422 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
423 if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
424 bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
425 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
426 }
427 if (flags & RADEON_FLAG_GTT_WC)
428 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
429 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
430 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
431 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
432 ws->info.has_local_buffers &&
433 (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
434 bo->base.is_local = true;
435 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
436 }
437
438 /* this won't do anything on pre 4.9 kernels */
439 if (initial_domain & RADEON_DOMAIN_VRAM) {
440 if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
441 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
442 }
443
444 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
445 if (r) {
446 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
447 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
448 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
449 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
450 goto error_bo_alloc;
451 }
452
453 r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0,
454 AMDGPU_VA_OP_MAP);
455 if (r)
456 goto error_va_map;
457
458 bo->bo = buf_handle;
459 bo->initial_domain = initial_domain;
460 bo->is_shared = false;
461 bo->priority = priority;
462
463 r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
464 assert(!r);
465
466 if (initial_domain & RADEON_DOMAIN_VRAM) {
467 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
468 * aren't mappable and they are counted as part of the VRAM
469 * counter.
470 *
471 * Otherwise, buffers with the CPU_ACCESS flag or without any
472 * of both (imported buffers) are counted as part of the VRAM
473 * visible counter because they can be mapped.
474 */
475 if (bo->base.vram_no_cpu_access) {
476 p_atomic_add(&ws->allocated_vram,
477 align64(bo->size, ws->info.gart_page_size));
478 } else {
479 p_atomic_add(&ws->allocated_vram_vis,
480 align64(bo->size, ws->info.gart_page_size));
481 }
482 }
483
484 if (initial_domain & RADEON_DOMAIN_GTT)
485 p_atomic_add(&ws->allocated_gtt,
486 align64(bo->size, ws->info.gart_page_size));
487
488 radv_amdgpu_add_buffer_to_global_list(bo);
489 return (struct radeon_winsys_bo *)bo;
490 error_va_map:
491 amdgpu_bo_free(buf_handle);
492
493 error_bo_alloc:
494 free(ranges);
495
496 error_ranges_alloc:
497 amdgpu_va_range_free(va_handle);
498
499 error_va_alloc:
500 FREE(bo);
501 return NULL;
502 }
503
504 static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo * _bo)505 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
506 {
507 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
508 int ret;
509 void *data;
510 ret = amdgpu_bo_cpu_map(bo->bo, &data);
511 if (ret)
512 return NULL;
513 return data;
514 }
515
516 static void
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo * _bo)517 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
518 {
519 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
520 amdgpu_bo_cpu_unmap(bo->bo);
521 }
522
523 static uint64_t
radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys * ws,uint64_t size,unsigned alignment)524 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
525 uint64_t size, unsigned alignment)
526 {
527 uint64_t vm_alignment = alignment;
528
529 /* Increase the VM alignment for faster address translation. */
530 if (size >= ws->info.pte_fragment_size)
531 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
532
533 /* Gfx9: Increase the VM alignment to the most significant bit set
534 * in the size for faster address translation.
535 */
536 if (ws->info.chip_class >= GFX9) {
537 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
538 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
539
540 vm_alignment = MAX2(vm_alignment, msb_alignment);
541 }
542 return vm_alignment;
543 }
544
545 static struct radeon_winsys_bo *
radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys * _ws,void * pointer,uint64_t size,unsigned priority)546 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
547 void *pointer,
548 uint64_t size,
549 unsigned priority)
550 {
551 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
552 amdgpu_bo_handle buf_handle;
553 struct radv_amdgpu_winsys_bo *bo;
554 uint64_t va;
555 amdgpu_va_handle va_handle;
556 uint64_t vm_alignment;
557
558 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
559 if (!bo)
560 return NULL;
561
562 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
563 goto error;
564
565 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
566 * are imported.
567 */
568 vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
569 ws->info.gart_page_size);
570
571 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
572 size, vm_alignment, 0, &va, &va_handle,
573 AMDGPU_VA_RANGE_HIGH))
574 goto error_va_alloc;
575
576 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
577 goto error_va_map;
578
579 /* Initialize it */
580 bo->base.va = va;
581 bo->va_handle = va_handle;
582 bo->size = size;
583 bo->ref_count = 1;
584 bo->ws = ws;
585 bo->bo = buf_handle;
586 bo->initial_domain = RADEON_DOMAIN_GTT;
587 bo->priority = priority;
588
589 ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
590 assert(!r);
591
592 p_atomic_add(&ws->allocated_gtt,
593 align64(bo->size, ws->info.gart_page_size));
594
595 radv_amdgpu_add_buffer_to_global_list(bo);
596 return (struct radeon_winsys_bo *)bo;
597
598 error_va_map:
599 amdgpu_va_range_free(va_handle);
600
601 error_va_alloc:
602 amdgpu_bo_free(buf_handle);
603
604 error:
605 FREE(bo);
606 return NULL;
607 }
608
609 static struct radeon_winsys_bo *
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys * _ws,int fd,unsigned priority,uint64_t * alloc_size)610 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
611 int fd, unsigned priority,
612 uint64_t *alloc_size)
613 {
614 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
615 struct radv_amdgpu_winsys_bo *bo;
616 uint64_t va;
617 amdgpu_va_handle va_handle;
618 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
619 struct amdgpu_bo_import_result result = {0};
620 struct amdgpu_bo_info info = {0};
621 enum radeon_bo_domain initial = 0;
622 int r;
623 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
624 if (!bo)
625 return NULL;
626
627 r = amdgpu_bo_import(ws->dev, type, fd, &result);
628 if (r)
629 goto error;
630
631 r = amdgpu_bo_query_info(result.buf_handle, &info);
632 if (r)
633 goto error_query;
634
635 if (alloc_size) {
636 *alloc_size = info.alloc_size;
637 }
638
639 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
640 result.alloc_size, 1 << 20, 0, &va, &va_handle,
641 AMDGPU_VA_RANGE_HIGH);
642 if (r)
643 goto error_query;
644
645 r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
646 va, 0, 0, AMDGPU_VA_OP_MAP);
647 if (r)
648 goto error_va_map;
649
650 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
651 initial |= RADEON_DOMAIN_VRAM;
652 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
653 initial |= RADEON_DOMAIN_GTT;
654
655 bo->bo = result.buf_handle;
656 bo->base.va = va;
657 bo->va_handle = va_handle;
658 bo->initial_domain = initial;
659 bo->size = result.alloc_size;
660 bo->is_shared = true;
661 bo->ws = ws;
662 bo->priority = priority;
663 bo->ref_count = 1;
664
665 r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
666 assert(!r);
667
668 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
669 p_atomic_add(&ws->allocated_vram,
670 align64(bo->size, ws->info.gart_page_size));
671 if (bo->initial_domain & RADEON_DOMAIN_GTT)
672 p_atomic_add(&ws->allocated_gtt,
673 align64(bo->size, ws->info.gart_page_size));
674
675 radv_amdgpu_add_buffer_to_global_list(bo);
676 return (struct radeon_winsys_bo *)bo;
677 error_va_map:
678 amdgpu_va_range_free(va_handle);
679
680 error_query:
681 amdgpu_bo_free(result.buf_handle);
682
683 error:
684 FREE(bo);
685 return NULL;
686 }
687
688 static bool
radv_amdgpu_winsys_get_fd(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,int * fd)689 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
690 struct radeon_winsys_bo *_bo,
691 int *fd)
692 {
693 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
694 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
695 int r;
696 unsigned handle;
697 r = amdgpu_bo_export(bo->bo, type, &handle);
698 if (r)
699 return false;
700
701 *fd = (int)handle;
702 bo->is_shared = true;
703 return true;
704 }
705
706 static bool
radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys * _ws,int fd,enum radeon_bo_domain * domains,enum radeon_bo_flag * flags)707 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd,
708 enum radeon_bo_domain *domains,
709 enum radeon_bo_flag *flags)
710 {
711 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
712 struct amdgpu_bo_import_result result = {0};
713 struct amdgpu_bo_info info = {0};
714 int r;
715
716 *domains = 0;
717 *flags = 0;
718
719 r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
720 if (r)
721 return false;
722
723 r = amdgpu_bo_query_info(result.buf_handle, &info);
724 amdgpu_bo_free(result.buf_handle);
725 if (r)
726 return false;
727
728 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
729 *domains |= RADEON_DOMAIN_VRAM;
730 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
731 *domains |= RADEON_DOMAIN_GTT;
732 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
733 *domains |= RADEON_DOMAIN_GDS;
734 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
735 *domains |= RADEON_DOMAIN_OA;
736
737 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
738 *flags |= RADEON_FLAG_CPU_ACCESS;
739 if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
740 *flags |= RADEON_FLAG_NO_CPU_ACCESS;
741 if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
742 *flags |= RADEON_FLAG_IMPLICIT_SYNC;
743 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
744 *flags |= RADEON_FLAG_GTT_WC;
745 if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
746 *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
747 if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
748 *flags |= RADEON_FLAG_ZERO_VRAM;
749 return true;
750 }
751
eg_tile_split(unsigned tile_split)752 static unsigned eg_tile_split(unsigned tile_split)
753 {
754 switch (tile_split) {
755 case 0: tile_split = 64; break;
756 case 1: tile_split = 128; break;
757 case 2: tile_split = 256; break;
758 case 3: tile_split = 512; break;
759 default:
760 case 4: tile_split = 1024; break;
761 case 5: tile_split = 2048; break;
762 case 6: tile_split = 4096; break;
763 }
764 return tile_split;
765 }
766
radv_eg_tile_split_rev(unsigned eg_tile_split)767 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
768 {
769 switch (eg_tile_split) {
770 case 64: return 0;
771 case 128: return 1;
772 case 256: return 2;
773 case 512: return 3;
774 default:
775 case 1024: return 4;
776 case 2048: return 5;
777 case 4096: return 6;
778 }
779 }
780
781 static void
radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)782 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
783 struct radeon_bo_metadata *md)
784 {
785 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
786 struct amdgpu_bo_metadata metadata = {0};
787 uint64_t tiling_flags = 0;
788
789 if (bo->ws->info.chip_class >= GFX9) {
790 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
791 tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
792 } else {
793 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
794 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
795 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
796 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
797 else
798 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
799
800 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
801 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
802 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
803 if (md->u.legacy.tile_split)
804 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
805 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
806 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
807
808 if (md->u.legacy.scanout)
809 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
810 else
811 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
812 }
813
814 metadata.tiling_info = tiling_flags;
815 metadata.size_metadata = md->size_metadata;
816 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
817
818 amdgpu_bo_set_metadata(bo->bo, &metadata);
819 }
820
821 static void
radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)822 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo,
823 struct radeon_bo_metadata *md)
824 {
825 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
826 struct amdgpu_bo_info info = {0};
827
828 int r = amdgpu_bo_query_info(bo->bo, &info);
829 if (r)
830 return;
831
832 uint64_t tiling_flags = info.metadata.tiling_info;
833
834 if (bo->ws->info.chip_class >= GFX9) {
835 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
836 md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
837 } else {
838 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
839 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
840
841 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
842 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
843 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
844 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
845
846 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
847 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
848 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
849 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
850 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
851 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
852 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
853 }
854
855 md->size_metadata = info.metadata.size_metadata;
856 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
857 }
858
radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys * ws)859 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
860 {
861 ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
862 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
863 ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
864 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
865 ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
866 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
867 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
868 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
869 ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
870 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
871 ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
872 }
873