1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27
28 #include "amdgpu_cs.h"
29
30 #include "util/hash_table.h"
31 #include "util/os_time.h"
32 #include "util/u_hash_table.h"
33 #include "frontend/drm_driver.h"
34 #include "drm-uapi/amdgpu_drm.h"
35 #include <xf86drm.h>
36 #include <stdio.h>
37 #include <inttypes.h>
38
39 #ifndef AMDGPU_VA_RANGE_HIGH
40 #define AMDGPU_VA_RANGE_HIGH 0x2
41 #endif
42
43 /* Set to 1 for verbose output showing committed sparse buffer ranges. */
44 #define DEBUG_SPARSE_COMMITS 0
45
46 struct amdgpu_sparse_backing_chunk {
47 uint32_t begin, end;
48 };
49
amdgpu_bo_wait(struct pb_buffer * _buf,uint64_t timeout,enum radeon_bo_usage usage)50 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
51 enum radeon_bo_usage usage)
52 {
53 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
54 struct amdgpu_winsys *ws = bo->ws;
55 int64_t abs_timeout;
56
57 if (timeout == 0) {
58 if (p_atomic_read(&bo->num_active_ioctls))
59 return false;
60
61 } else {
62 abs_timeout = os_time_get_absolute_timeout(timeout);
63
64 /* Wait if any ioctl is being submitted with this buffer. */
65 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
66 return false;
67 }
68
69 if (bo->is_shared) {
70 /* We can't use user fences for shared buffers, because user fences
71 * are local to this process only. If we want to wait for all buffer
72 * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
73 */
74 bool buffer_busy = true;
75 int r;
76
77 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
78 if (r)
79 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
80 r);
81 return !buffer_busy;
82 }
83
84 if (timeout == 0) {
85 unsigned idle_fences;
86 bool buffer_idle;
87
88 simple_mtx_lock(&ws->bo_fence_lock);
89
90 for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
91 if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
92 break;
93 }
94
95 /* Release the idle fences to avoid checking them again later. */
96 for (unsigned i = 0; i < idle_fences; ++i)
97 amdgpu_fence_reference(&bo->fences[i], NULL);
98
99 memmove(&bo->fences[0], &bo->fences[idle_fences],
100 (bo->num_fences - idle_fences) * sizeof(*bo->fences));
101 bo->num_fences -= idle_fences;
102
103 buffer_idle = !bo->num_fences;
104 simple_mtx_unlock(&ws->bo_fence_lock);
105
106 return buffer_idle;
107 } else {
108 bool buffer_idle = true;
109
110 simple_mtx_lock(&ws->bo_fence_lock);
111 while (bo->num_fences && buffer_idle) {
112 struct pipe_fence_handle *fence = NULL;
113 bool fence_idle = false;
114
115 amdgpu_fence_reference(&fence, bo->fences[0]);
116
117 /* Wait for the fence. */
118 simple_mtx_unlock(&ws->bo_fence_lock);
119 if (amdgpu_fence_wait(fence, abs_timeout, true))
120 fence_idle = true;
121 else
122 buffer_idle = false;
123 simple_mtx_lock(&ws->bo_fence_lock);
124
125 /* Release an idle fence to avoid checking it again later, keeping in
126 * mind that the fence array may have been modified by other threads.
127 */
128 if (fence_idle && bo->num_fences && bo->fences[0] == fence) {
129 amdgpu_fence_reference(&bo->fences[0], NULL);
130 memmove(&bo->fences[0], &bo->fences[1],
131 (bo->num_fences - 1) * sizeof(*bo->fences));
132 bo->num_fences--;
133 }
134
135 amdgpu_fence_reference(&fence, NULL);
136 }
137 simple_mtx_unlock(&ws->bo_fence_lock);
138
139 return buffer_idle;
140 }
141 }
142
amdgpu_bo_get_initial_domain(struct pb_buffer * buf)143 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
144 struct pb_buffer *buf)
145 {
146 return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
147 }
148
amdgpu_bo_get_flags(struct pb_buffer * buf)149 static enum radeon_bo_flag amdgpu_bo_get_flags(
150 struct pb_buffer *buf)
151 {
152 return ((struct amdgpu_winsys_bo*)buf)->flags;
153 }
154
amdgpu_bo_remove_fences(struct amdgpu_winsys_bo * bo)155 static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)
156 {
157 for (unsigned i = 0; i < bo->num_fences; ++i)
158 amdgpu_fence_reference(&bo->fences[i], NULL);
159
160 FREE(bo->fences);
161 bo->num_fences = 0;
162 bo->max_fences = 0;
163 }
164
amdgpu_bo_destroy(struct pb_buffer * _buf)165 void amdgpu_bo_destroy(struct pb_buffer *_buf)
166 {
167 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
168 struct amdgpu_screen_winsys *sws_iter;
169 struct amdgpu_winsys *ws = bo->ws;
170
171 assert(bo->bo && "must not be called for slab entries");
172
173 if (!bo->is_user_ptr && bo->cpu_ptr) {
174 bo->cpu_ptr = NULL;
175 amdgpu_bo_unmap(&bo->base);
176 }
177 assert(bo->is_user_ptr || bo->u.real.map_count == 0);
178
179 if (ws->debug_all_bos) {
180 simple_mtx_lock(&ws->global_bo_list_lock);
181 list_del(&bo->u.real.global_list_item);
182 ws->num_buffers--;
183 simple_mtx_unlock(&ws->global_bo_list_lock);
184 }
185
186 /* Close all KMS handles retrieved for other DRM file descriptions */
187 simple_mtx_lock(&ws->sws_list_lock);
188 for (sws_iter = ws->sws_list; sws_iter; sws_iter = sws_iter->next) {
189 struct hash_entry *entry;
190
191 if (!sws_iter->kms_handles)
192 continue;
193
194 entry = _mesa_hash_table_search(sws_iter->kms_handles, bo);
195 if (entry) {
196 struct drm_gem_close args = { .handle = (uintptr_t)entry->data };
197
198 drmIoctl(sws_iter->fd, DRM_IOCTL_GEM_CLOSE, &args);
199 _mesa_hash_table_remove(sws_iter->kms_handles, entry);
200 }
201 }
202 simple_mtx_unlock(&ws->sws_list_lock);
203
204 simple_mtx_lock(&ws->bo_export_table_lock);
205 _mesa_hash_table_remove_key(ws->bo_export_table, bo->bo);
206 simple_mtx_unlock(&ws->bo_export_table_lock);
207
208 if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) {
209 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
210 amdgpu_va_range_free(bo->u.real.va_handle);
211 }
212 amdgpu_bo_free(bo->bo);
213
214 amdgpu_bo_remove_fences(bo);
215
216 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
217 ws->allocated_vram -= align64(bo->base.size, ws->info.gart_page_size);
218 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
219 ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);
220
221 simple_mtx_destroy(&bo->lock);
222 FREE(bo);
223 }
224
amdgpu_bo_destroy_or_cache(struct pb_buffer * _buf)225 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
226 {
227 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
228
229 assert(bo->bo); /* slab buffers have a separate vtbl */
230
231 if (bo->u.real.use_reusable_pool)
232 pb_cache_add_buffer(&bo->u.real.cache_entry);
233 else
234 amdgpu_bo_destroy(_buf);
235 }
236
amdgpu_clean_up_buffer_managers(struct amdgpu_winsys * ws)237 static void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws)
238 {
239 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
240 pb_slabs_reclaim(&ws->bo_slabs[i]);
241 if (ws->info.has_tmz_support)
242 pb_slabs_reclaim(&ws->bo_slabs_encrypted[i]);
243 }
244
245 pb_cache_release_all_buffers(&ws->bo_cache);
246 }
247
amdgpu_bo_do_map(struct amdgpu_winsys_bo * bo,void ** cpu)248 static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu)
249 {
250 assert(!bo->sparse && bo->bo && !bo->is_user_ptr);
251 int r = amdgpu_bo_cpu_map(bo->bo, cpu);
252 if (r) {
253 /* Clean up buffer managers and try again. */
254 amdgpu_clean_up_buffer_managers(bo->ws);
255 r = amdgpu_bo_cpu_map(bo->bo, cpu);
256 if (r)
257 return false;
258 }
259
260 if (p_atomic_inc_return(&bo->u.real.map_count) == 1) {
261 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
262 bo->ws->mapped_vram += bo->base.size;
263 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
264 bo->ws->mapped_gtt += bo->base.size;
265 bo->ws->num_mapped_buffers++;
266 }
267
268 return true;
269 }
270
amdgpu_bo_map(struct pb_buffer * buf,struct radeon_cmdbuf * rcs,enum pipe_map_flags usage)271 void *amdgpu_bo_map(struct pb_buffer *buf,
272 struct radeon_cmdbuf *rcs,
273 enum pipe_map_flags usage)
274 {
275 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
276 struct amdgpu_winsys_bo *real;
277 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
278
279 assert(!bo->sparse);
280
281 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
282 if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
283 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
284 if (usage & PIPE_MAP_DONTBLOCK) {
285 if (!(usage & PIPE_MAP_WRITE)) {
286 /* Mapping for read.
287 *
288 * Since we are mapping for read, we don't need to wait
289 * if the GPU is using the buffer for read too
290 * (neither one is changing it).
291 *
292 * Only check whether the buffer is being used for write. */
293 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
294 RADEON_USAGE_WRITE)) {
295 cs->flush_cs(cs->flush_data,
296 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
297 return NULL;
298 }
299
300 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
301 RADEON_USAGE_WRITE)) {
302 return NULL;
303 }
304 } else {
305 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
306 cs->flush_cs(cs->flush_data,
307 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
308 return NULL;
309 }
310
311 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
312 RADEON_USAGE_READWRITE)) {
313 return NULL;
314 }
315 }
316 } else {
317 uint64_t time = os_time_get_nano();
318
319 if (!(usage & PIPE_MAP_WRITE)) {
320 /* Mapping for read.
321 *
322 * Since we are mapping for read, we don't need to wait
323 * if the GPU is using the buffer for read too
324 * (neither one is changing it).
325 *
326 * Only check whether the buffer is being used for write. */
327 if (cs) {
328 if (amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
329 RADEON_USAGE_WRITE)) {
330 cs->flush_cs(cs->flush_data,
331 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
332 } else {
333 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
334 if (p_atomic_read(&bo->num_active_ioctls))
335 amdgpu_cs_sync_flush(rcs);
336 }
337 }
338
339 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
340 RADEON_USAGE_WRITE);
341 } else {
342 /* Mapping for write. */
343 if (cs) {
344 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
345 cs->flush_cs(cs->flush_data,
346 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
347 } else {
348 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
349 if (p_atomic_read(&bo->num_active_ioctls))
350 amdgpu_cs_sync_flush(rcs);
351 }
352 }
353
354 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
355 RADEON_USAGE_READWRITE);
356 }
357
358 bo->ws->buffer_wait_time += os_time_get_nano() - time;
359 }
360 }
361
362 /* Buffer synchronization has been checked, now actually map the buffer. */
363 void *cpu = NULL;
364 uint64_t offset = 0;
365
366 if (bo->bo) {
367 real = bo;
368 } else {
369 real = bo->u.slab.real;
370 offset = bo->va - real->va;
371 }
372
373 if (usage & RADEON_MAP_TEMPORARY) {
374 if (real->is_user_ptr) {
375 cpu = real->cpu_ptr;
376 } else {
377 if (!amdgpu_bo_do_map(real, &cpu))
378 return NULL;
379 }
380 } else {
381 cpu = p_atomic_read(&real->cpu_ptr);
382 if (!cpu) {
383 simple_mtx_lock(&real->lock);
384 /* Must re-check due to the possibility of a race. Re-check need not
385 * be atomic thanks to the lock. */
386 cpu = real->cpu_ptr;
387 if (!cpu) {
388 if (!amdgpu_bo_do_map(real, &cpu)) {
389 simple_mtx_unlock(&real->lock);
390 return NULL;
391 }
392 p_atomic_set(&real->cpu_ptr, cpu);
393 }
394 simple_mtx_unlock(&real->lock);
395 }
396 }
397
398 return (uint8_t*)cpu + offset;
399 }
400
amdgpu_bo_unmap(struct pb_buffer * buf)401 void amdgpu_bo_unmap(struct pb_buffer *buf)
402 {
403 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
404 struct amdgpu_winsys_bo *real;
405
406 assert(!bo->sparse);
407
408 if (bo->is_user_ptr)
409 return;
410
411 real = bo->bo ? bo : bo->u.slab.real;
412 assert(real->u.real.map_count != 0 && "too many unmaps");
413 if (p_atomic_dec_zero(&real->u.real.map_count)) {
414 assert(!real->cpu_ptr &&
415 "too many unmaps or forgot RADEON_MAP_TEMPORARY flag");
416
417 if (real->initial_domain & RADEON_DOMAIN_VRAM)
418 real->ws->mapped_vram -= real->base.size;
419 else if (real->initial_domain & RADEON_DOMAIN_GTT)
420 real->ws->mapped_gtt -= real->base.size;
421 real->ws->num_mapped_buffers--;
422 }
423
424 amdgpu_bo_cpu_unmap(real->bo);
425 }
426
427 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
428 amdgpu_bo_destroy_or_cache
429 /* other functions are never called */
430 };
431
amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo * bo)432 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
433 {
434 struct amdgpu_winsys *ws = bo->ws;
435
436 assert(bo->bo);
437
438 if (ws->debug_all_bos) {
439 simple_mtx_lock(&ws->global_bo_list_lock);
440 list_addtail(&bo->u.real.global_list_item, &ws->global_bo_list);
441 ws->num_buffers++;
442 simple_mtx_unlock(&ws->global_bo_list_lock);
443 }
444 }
445
amdgpu_get_optimal_alignment(struct amdgpu_winsys * ws,uint64_t size,unsigned alignment)446 static unsigned amdgpu_get_optimal_alignment(struct amdgpu_winsys *ws,
447 uint64_t size, unsigned alignment)
448 {
449 /* Increase the alignment for faster address translation and better memory
450 * access pattern.
451 */
452 if (size >= ws->info.pte_fragment_size) {
453 alignment = MAX2(alignment, ws->info.pte_fragment_size);
454 } else if (size) {
455 unsigned msb = util_last_bit(size);
456
457 alignment = MAX2(alignment, 1u << (msb - 1));
458 }
459 return alignment;
460 }
461
amdgpu_create_bo(struct amdgpu_winsys * ws,uint64_t size,unsigned alignment,enum radeon_bo_domain initial_domain,unsigned flags,int heap)462 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
463 uint64_t size,
464 unsigned alignment,
465 enum radeon_bo_domain initial_domain,
466 unsigned flags,
467 int heap)
468 {
469 struct amdgpu_bo_alloc_request request = {0};
470 amdgpu_bo_handle buf_handle;
471 uint64_t va = 0;
472 struct amdgpu_winsys_bo *bo;
473 amdgpu_va_handle va_handle = NULL;
474 int r;
475
476 /* VRAM or GTT must be specified, but not both at the same time. */
477 assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT |
478 RADEON_DOMAIN_GDS |
479 RADEON_DOMAIN_OA)) == 1);
480
481 alignment = amdgpu_get_optimal_alignment(ws, size, alignment);
482
483 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
484 if (!bo) {
485 return NULL;
486 }
487
488 if (heap >= 0) {
489 pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
490 heap);
491 }
492 request.alloc_size = size;
493 request.phys_alignment = alignment;
494
495 if (initial_domain & RADEON_DOMAIN_VRAM) {
496 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
497
498 /* Since VRAM and GTT have almost the same performance on APUs, we could
499 * just set GTT. However, in order to decrease GTT(RAM) usage, which is
500 * shared with the OS, allow VRAM placements too. The idea is not to use
501 * VRAM usefully, but to use it so that it's not unused and wasted.
502 */
503 if (!ws->info.has_dedicated_vram)
504 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
505 }
506
507 if (initial_domain & RADEON_DOMAIN_GTT)
508 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
509 if (initial_domain & RADEON_DOMAIN_GDS)
510 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
511 if (initial_domain & RADEON_DOMAIN_OA)
512 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
513
514 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
515 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
516 if (flags & RADEON_FLAG_GTT_WC)
517 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
518 if (ws->zero_all_vram_allocs &&
519 (request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM))
520 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
521 if ((flags & RADEON_FLAG_ENCRYPTED) &&
522 ws->info.has_tmz_support) {
523 request.flags |= AMDGPU_GEM_CREATE_ENCRYPTED;
524
525 if (!(flags & RADEON_FLAG_DRIVER_INTERNAL)) {
526 struct amdgpu_screen_winsys *sws_iter;
527 simple_mtx_lock(&ws->sws_list_lock);
528 for (sws_iter = ws->sws_list; sws_iter; sws_iter = sws_iter->next) {
529 *((bool*) &sws_iter->base.uses_secure_bos) = true;
530 }
531 simple_mtx_unlock(&ws->sws_list_lock);
532 }
533 }
534
535 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
536 if (r) {
537 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
538 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
539 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
540 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
541 fprintf(stderr, "amdgpu: flags : %" PRIx64 "\n", request.flags);
542 goto error_bo_alloc;
543 }
544
545 if (initial_domain & RADEON_DOMAIN_VRAM_GTT) {
546 unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
547
548 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
549 size + va_gap_size, alignment,
550 0, &va, &va_handle,
551 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
552 AMDGPU_VA_RANGE_HIGH);
553 if (r)
554 goto error_va_alloc;
555
556 unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |
557 AMDGPU_VM_PAGE_EXECUTABLE;
558
559 if (!(flags & RADEON_FLAG_READ_ONLY))
560 vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
561
562 if (flags & RADEON_FLAG_UNCACHED)
563 vm_flags |= AMDGPU_VM_MTYPE_UC;
564
565 r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
566 AMDGPU_VA_OP_MAP);
567 if (r)
568 goto error_va_map;
569 }
570
571 simple_mtx_init(&bo->lock, mtx_plain);
572 pipe_reference_init(&bo->base.reference, 1);
573 bo->base.alignment = alignment;
574 bo->base.usage = 0;
575 bo->base.size = size;
576 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
577 bo->ws = ws;
578 bo->bo = buf_handle;
579 bo->va = va;
580 bo->u.real.va_handle = va_handle;
581 bo->initial_domain = initial_domain;
582 bo->flags = flags;
583 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
584
585 if (initial_domain & RADEON_DOMAIN_VRAM)
586 ws->allocated_vram += align64(size, ws->info.gart_page_size);
587 else if (initial_domain & RADEON_DOMAIN_GTT)
588 ws->allocated_gtt += align64(size, ws->info.gart_page_size);
589
590 amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
591
592 amdgpu_add_buffer_to_global_list(bo);
593
594 return bo;
595
596 error_va_map:
597 amdgpu_va_range_free(va_handle);
598
599 error_va_alloc:
600 amdgpu_bo_free(buf_handle);
601
602 error_bo_alloc:
603 FREE(bo);
604 return NULL;
605 }
606
amdgpu_bo_can_reclaim(struct pb_buffer * _buf)607 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
608 {
609 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
610
611 if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
612 return false;
613 }
614
615 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
616 }
617
amdgpu_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)618 bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
619 {
620 struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */
621 bo = container_of(entry, bo, u.slab.entry);
622
623 return amdgpu_bo_can_reclaim(&bo->base);
624 }
625
get_slabs(struct amdgpu_winsys * ws,uint64_t size,enum radeon_bo_flag flags)626 static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size,
627 enum radeon_bo_flag flags)
628 {
629 struct pb_slabs *bo_slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
630 ws->bo_slabs_encrypted : ws->bo_slabs;
631 /* Find the correct slab allocator for the given size. */
632 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
633 struct pb_slabs *slabs = &bo_slabs[i];
634
635 if (size <= 1 << (slabs->min_order + slabs->num_orders - 1))
636 return slabs;
637 }
638
639 assert(0);
640 return NULL;
641 }
642
amdgpu_bo_slab_destroy(struct pb_buffer * _buf)643 static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
644 {
645 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
646
647 assert(!bo->bo);
648
649 if (bo->flags & RADEON_FLAG_ENCRYPTED)
650 pb_slab_free(get_slabs(bo->ws,
651 bo->base.size,
652 RADEON_FLAG_ENCRYPTED), &bo->u.slab.entry);
653 else
654 pb_slab_free(get_slabs(bo->ws,
655 bo->base.size,
656 0), &bo->u.slab.entry);
657 }
658
659 static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = {
660 amdgpu_bo_slab_destroy
661 /* other functions are never called */
662 };
663
amdgpu_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index,bool encrypted)664 static struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
665 unsigned entry_size,
666 unsigned group_index,
667 bool encrypted)
668 {
669 struct amdgpu_winsys *ws = priv;
670 struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
671 enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
672 enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
673 uint32_t base_id;
674 unsigned slab_size = 0;
675
676 if (!slab)
677 return NULL;
678
679 if (encrypted)
680 flags |= RADEON_FLAG_ENCRYPTED;
681
682 struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
683 ws->bo_slabs_encrypted : ws->bo_slabs;
684
685 /* Determine the slab buffer size. */
686 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
687 unsigned max_entry_size = 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
688
689 if (entry_size <= max_entry_size) {
690 /* The slab size is twice the size of the largest possible entry. */
691 slab_size = max_entry_size * 2;
692
693 /* The largest slab should have the same size as the PTE fragment
694 * size to get faster address translation.
695 */
696 if (i == NUM_SLAB_ALLOCATORS - 1 &&
697 slab_size < ws->info.pte_fragment_size)
698 slab_size = ws->info.pte_fragment_size;
699 break;
700 }
701 }
702 assert(slab_size != 0);
703
704 slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(ws,
705 slab_size, slab_size,
706 domains, flags));
707 if (!slab->buffer)
708 goto fail;
709
710 slab->base.num_entries = slab->buffer->base.size / entry_size;
711 slab->base.num_free = slab->base.num_entries;
712 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
713 if (!slab->entries)
714 goto fail_buffer;
715
716 list_inithead(&slab->base.free);
717
718 base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries);
719
720 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
721 struct amdgpu_winsys_bo *bo = &slab->entries[i];
722
723 simple_mtx_init(&bo->lock, mtx_plain);
724 bo->base.alignment = entry_size;
725 bo->base.usage = slab->buffer->base.usage;
726 bo->base.size = entry_size;
727 bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl;
728 bo->ws = ws;
729 bo->va = slab->buffer->va + i * entry_size;
730 bo->initial_domain = domains;
731 bo->unique_id = base_id + i;
732 bo->u.slab.entry.slab = &slab->base;
733 bo->u.slab.entry.group_index = group_index;
734
735 if (slab->buffer->bo) {
736 /* The slab is not suballocated. */
737 bo->u.slab.real = slab->buffer;
738 } else {
739 /* The slab is allocated out of a bigger slab. */
740 bo->u.slab.real = slab->buffer->u.slab.real;
741 assert(bo->u.slab.real->bo);
742 }
743
744 list_addtail(&bo->u.slab.entry.head, &slab->base.free);
745 }
746
747 return &slab->base;
748
749 fail_buffer:
750 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
751 fail:
752 FREE(slab);
753 return NULL;
754 }
755
amdgpu_bo_slab_alloc_encrypted(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)756 struct pb_slab *amdgpu_bo_slab_alloc_encrypted(void *priv, unsigned heap,
757 unsigned entry_size,
758 unsigned group_index)
759 {
760 return amdgpu_bo_slab_alloc(priv, heap, entry_size, group_index, true);
761 }
762
amdgpu_bo_slab_alloc_normal(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)763 struct pb_slab *amdgpu_bo_slab_alloc_normal(void *priv, unsigned heap,
764 unsigned entry_size,
765 unsigned group_index)
766 {
767 return amdgpu_bo_slab_alloc(priv, heap, entry_size, group_index, false);
768 }
769
amdgpu_bo_slab_free(void * priv,struct pb_slab * pslab)770 void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
771 {
772 struct amdgpu_slab *slab = amdgpu_slab(pslab);
773
774 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
775 amdgpu_bo_remove_fences(&slab->entries[i]);
776 simple_mtx_destroy(&slab->entries[i].lock);
777 }
778
779 FREE(slab->entries);
780 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
781 FREE(slab);
782 }
783
784 #if DEBUG_SPARSE_COMMITS
785 static void
sparse_dump(struct amdgpu_winsys_bo * bo,const char * func)786 sparse_dump(struct amdgpu_winsys_bo *bo, const char *func)
787 {
788 fprintf(stderr, "%s: %p (size=%"PRIu64", num_va_pages=%u) @ %s\n"
789 "Commitments:\n",
790 __func__, bo, bo->base.size, bo->u.sparse.num_va_pages, func);
791
792 struct amdgpu_sparse_backing *span_backing = NULL;
793 uint32_t span_first_backing_page = 0;
794 uint32_t span_first_va_page = 0;
795 uint32_t va_page = 0;
796
797 for (;;) {
798 struct amdgpu_sparse_backing *backing = 0;
799 uint32_t backing_page = 0;
800
801 if (va_page < bo->u.sparse.num_va_pages) {
802 backing = bo->u.sparse.commitments[va_page].backing;
803 backing_page = bo->u.sparse.commitments[va_page].page;
804 }
805
806 if (span_backing &&
807 (backing != span_backing ||
808 backing_page != span_first_backing_page + (va_page - span_first_va_page))) {
809 fprintf(stderr, " %u..%u: backing=%p:%u..%u\n",
810 span_first_va_page, va_page - 1, span_backing,
811 span_first_backing_page,
812 span_first_backing_page + (va_page - span_first_va_page) - 1);
813
814 span_backing = NULL;
815 }
816
817 if (va_page >= bo->u.sparse.num_va_pages)
818 break;
819
820 if (backing && !span_backing) {
821 span_backing = backing;
822 span_first_backing_page = backing_page;
823 span_first_va_page = va_page;
824 }
825
826 va_page++;
827 }
828
829 fprintf(stderr, "Backing:\n");
830
831 list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
832 fprintf(stderr, " %p (size=%"PRIu64")\n", backing, backing->bo->base.size);
833 for (unsigned i = 0; i < backing->num_chunks; ++i)
834 fprintf(stderr, " %u..%u\n", backing->chunks[i].begin, backing->chunks[i].end);
835 }
836 }
837 #endif
838
839 /*
840 * Attempt to allocate the given number of backing pages. Fewer pages may be
841 * allocated (depending on the fragmentation of existing backing buffers),
842 * which will be reflected by a change to *pnum_pages.
843 */
844 static struct amdgpu_sparse_backing *
sparse_backing_alloc(struct amdgpu_winsys_bo * bo,uint32_t * pstart_page,uint32_t * pnum_pages)845 sparse_backing_alloc(struct amdgpu_winsys_bo *bo, uint32_t *pstart_page, uint32_t *pnum_pages)
846 {
847 struct amdgpu_sparse_backing *best_backing;
848 unsigned best_idx;
849 uint32_t best_num_pages;
850
851 best_backing = NULL;
852 best_idx = 0;
853 best_num_pages = 0;
854
855 /* This is a very simple and inefficient best-fit algorithm. */
856 list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
857 for (unsigned idx = 0; idx < backing->num_chunks; ++idx) {
858 uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin;
859 if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) ||
860 (best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) {
861 best_backing = backing;
862 best_idx = idx;
863 best_num_pages = cur_num_pages;
864 }
865 }
866 }
867
868 /* Allocate a new backing buffer if necessary. */
869 if (!best_backing) {
870 struct pb_buffer *buf;
871 uint64_t size;
872 uint32_t pages;
873
874 best_backing = CALLOC_STRUCT(amdgpu_sparse_backing);
875 if (!best_backing)
876 return NULL;
877
878 best_backing->max_chunks = 4;
879 best_backing->chunks = CALLOC(best_backing->max_chunks,
880 sizeof(*best_backing->chunks));
881 if (!best_backing->chunks) {
882 FREE(best_backing);
883 return NULL;
884 }
885
886 assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, RADEON_SPARSE_PAGE_SIZE));
887
888 size = MIN3(bo->base.size / 16,
889 8 * 1024 * 1024,
890 bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * RADEON_SPARSE_PAGE_SIZE);
891 size = MAX2(size, RADEON_SPARSE_PAGE_SIZE);
892
893 buf = amdgpu_bo_create(bo->ws, size, RADEON_SPARSE_PAGE_SIZE,
894 bo->initial_domain,
895 bo->u.sparse.flags | RADEON_FLAG_NO_SUBALLOC);
896 if (!buf) {
897 FREE(best_backing->chunks);
898 FREE(best_backing);
899 return NULL;
900 }
901
902 /* We might have gotten a bigger buffer than requested via caching. */
903 pages = buf->size / RADEON_SPARSE_PAGE_SIZE;
904
905 best_backing->bo = amdgpu_winsys_bo(buf);
906 best_backing->num_chunks = 1;
907 best_backing->chunks[0].begin = 0;
908 best_backing->chunks[0].end = pages;
909
910 list_add(&best_backing->list, &bo->u.sparse.backing);
911 bo->u.sparse.num_backing_pages += pages;
912
913 best_idx = 0;
914 best_num_pages = pages;
915 }
916
917 *pnum_pages = MIN2(*pnum_pages, best_num_pages);
918 *pstart_page = best_backing->chunks[best_idx].begin;
919 best_backing->chunks[best_idx].begin += *pnum_pages;
920
921 if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) {
922 memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1],
923 sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1));
924 best_backing->num_chunks--;
925 }
926
927 return best_backing;
928 }
929
930 static void
sparse_free_backing_buffer(struct amdgpu_winsys_bo * bo,struct amdgpu_sparse_backing * backing)931 sparse_free_backing_buffer(struct amdgpu_winsys_bo *bo,
932 struct amdgpu_sparse_backing *backing)
933 {
934 struct amdgpu_winsys *ws = backing->bo->ws;
935
936 bo->u.sparse.num_backing_pages -= backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE;
937
938 simple_mtx_lock(&ws->bo_fence_lock);
939 amdgpu_add_fences(backing->bo, bo->num_fences, bo->fences);
940 simple_mtx_unlock(&ws->bo_fence_lock);
941
942 list_del(&backing->list);
943 amdgpu_winsys_bo_reference(&backing->bo, NULL);
944 FREE(backing->chunks);
945 FREE(backing);
946 }
947
948 /*
949 * Return a range of pages from the given backing buffer back into the
950 * free structure.
951 */
952 static bool
sparse_backing_free(struct amdgpu_winsys_bo * bo,struct amdgpu_sparse_backing * backing,uint32_t start_page,uint32_t num_pages)953 sparse_backing_free(struct amdgpu_winsys_bo *bo,
954 struct amdgpu_sparse_backing *backing,
955 uint32_t start_page, uint32_t num_pages)
956 {
957 uint32_t end_page = start_page + num_pages;
958 unsigned low = 0;
959 unsigned high = backing->num_chunks;
960
961 /* Find the first chunk with begin >= start_page. */
962 while (low < high) {
963 unsigned mid = low + (high - low) / 2;
964
965 if (backing->chunks[mid].begin >= start_page)
966 high = mid;
967 else
968 low = mid + 1;
969 }
970
971 assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin);
972 assert(low == 0 || backing->chunks[low - 1].end <= start_page);
973
974 if (low > 0 && backing->chunks[low - 1].end == start_page) {
975 backing->chunks[low - 1].end = end_page;
976
977 if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
978 backing->chunks[low - 1].end = backing->chunks[low].end;
979 memmove(&backing->chunks[low], &backing->chunks[low + 1],
980 sizeof(*backing->chunks) * (backing->num_chunks - low - 1));
981 backing->num_chunks--;
982 }
983 } else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
984 backing->chunks[low].begin = start_page;
985 } else {
986 if (backing->num_chunks >= backing->max_chunks) {
987 unsigned new_max_chunks = 2 * backing->max_chunks;
988 struct amdgpu_sparse_backing_chunk *new_chunks =
989 REALLOC(backing->chunks,
990 sizeof(*backing->chunks) * backing->max_chunks,
991 sizeof(*backing->chunks) * new_max_chunks);
992 if (!new_chunks)
993 return false;
994
995 backing->max_chunks = new_max_chunks;
996 backing->chunks = new_chunks;
997 }
998
999 memmove(&backing->chunks[low + 1], &backing->chunks[low],
1000 sizeof(*backing->chunks) * (backing->num_chunks - low));
1001 backing->chunks[low].begin = start_page;
1002 backing->chunks[low].end = end_page;
1003 backing->num_chunks++;
1004 }
1005
1006 if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
1007 backing->chunks[0].end == backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE)
1008 sparse_free_backing_buffer(bo, backing);
1009
1010 return true;
1011 }
1012
amdgpu_bo_sparse_destroy(struct pb_buffer * _buf)1013 static void amdgpu_bo_sparse_destroy(struct pb_buffer *_buf)
1014 {
1015 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
1016 int r;
1017
1018 assert(!bo->bo && bo->sparse);
1019
1020 r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
1021 (uint64_t)bo->u.sparse.num_va_pages * RADEON_SPARSE_PAGE_SIZE,
1022 bo->va, 0, AMDGPU_VA_OP_CLEAR);
1023 if (r) {
1024 fprintf(stderr, "amdgpu: clearing PRT VA region on destroy failed (%d)\n", r);
1025 }
1026
1027 while (!list_is_empty(&bo->u.sparse.backing)) {
1028 struct amdgpu_sparse_backing *dummy = NULL;
1029 sparse_free_backing_buffer(bo,
1030 container_of(bo->u.sparse.backing.next,
1031 dummy, list));
1032 }
1033
1034 amdgpu_va_range_free(bo->u.sparse.va_handle);
1035 FREE(bo->u.sparse.commitments);
1036 simple_mtx_destroy(&bo->lock);
1037 FREE(bo);
1038 }
1039
1040 static const struct pb_vtbl amdgpu_winsys_bo_sparse_vtbl = {
1041 amdgpu_bo_sparse_destroy
1042 /* other functions are never called */
1043 };
1044
1045 static struct pb_buffer *
amdgpu_bo_sparse_create(struct amdgpu_winsys * ws,uint64_t size,enum radeon_bo_domain domain,enum radeon_bo_flag flags)1046 amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,
1047 enum radeon_bo_domain domain,
1048 enum radeon_bo_flag flags)
1049 {
1050 struct amdgpu_winsys_bo *bo;
1051 uint64_t map_size;
1052 uint64_t va_gap_size;
1053 int r;
1054
1055 /* We use 32-bit page numbers; refuse to attempt allocating sparse buffers
1056 * that exceed this limit. This is not really a restriction: we don't have
1057 * that much virtual address space anyway.
1058 */
1059 if (size > (uint64_t)INT32_MAX * RADEON_SPARSE_PAGE_SIZE)
1060 return NULL;
1061
1062 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
1063 if (!bo)
1064 return NULL;
1065
1066 simple_mtx_init(&bo->lock, mtx_plain);
1067 pipe_reference_init(&bo->base.reference, 1);
1068 bo->base.alignment = RADEON_SPARSE_PAGE_SIZE;
1069 bo->base.size = size;
1070 bo->base.vtbl = &amdgpu_winsys_bo_sparse_vtbl;
1071 bo->ws = ws;
1072 bo->initial_domain = domain;
1073 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
1074 bo->sparse = true;
1075 bo->u.sparse.flags = flags & ~RADEON_FLAG_SPARSE;
1076
1077 bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
1078 bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
1079 sizeof(*bo->u.sparse.commitments));
1080 if (!bo->u.sparse.commitments)
1081 goto error_alloc_commitments;
1082
1083 list_inithead(&bo->u.sparse.backing);
1084
1085 /* For simplicity, we always map a multiple of the page size. */
1086 map_size = align64(size, RADEON_SPARSE_PAGE_SIZE);
1087 va_gap_size = ws->check_vm ? 4 * RADEON_SPARSE_PAGE_SIZE : 0;
1088 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
1089 map_size + va_gap_size, RADEON_SPARSE_PAGE_SIZE,
1090 0, &bo->va, &bo->u.sparse.va_handle,
1091 AMDGPU_VA_RANGE_HIGH);
1092 if (r)
1093 goto error_va_alloc;
1094
1095 r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0, size, bo->va,
1096 AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
1097 if (r)
1098 goto error_va_map;
1099
1100 return &bo->base;
1101
1102 error_va_map:
1103 amdgpu_va_range_free(bo->u.sparse.va_handle);
1104 error_va_alloc:
1105 FREE(bo->u.sparse.commitments);
1106 error_alloc_commitments:
1107 simple_mtx_destroy(&bo->lock);
1108 FREE(bo);
1109 return NULL;
1110 }
1111
1112 static bool
amdgpu_bo_sparse_commit(struct pb_buffer * buf,uint64_t offset,uint64_t size,bool commit)1113 amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,
1114 bool commit)
1115 {
1116 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf);
1117 struct amdgpu_sparse_commitment *comm;
1118 uint32_t va_page, end_va_page;
1119 bool ok = true;
1120 int r;
1121
1122 assert(bo->sparse);
1123 assert(offset % RADEON_SPARSE_PAGE_SIZE == 0);
1124 assert(offset <= bo->base.size);
1125 assert(size <= bo->base.size - offset);
1126 assert(size % RADEON_SPARSE_PAGE_SIZE == 0 || offset + size == bo->base.size);
1127
1128 comm = bo->u.sparse.commitments;
1129 va_page = offset / RADEON_SPARSE_PAGE_SIZE;
1130 end_va_page = va_page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
1131
1132 simple_mtx_lock(&bo->lock);
1133
1134 #if DEBUG_SPARSE_COMMITS
1135 sparse_dump(bo, __func__);
1136 #endif
1137
1138 if (commit) {
1139 while (va_page < end_va_page) {
1140 uint32_t span_va_page;
1141
1142 /* Skip pages that are already committed. */
1143 if (comm[va_page].backing) {
1144 va_page++;
1145 continue;
1146 }
1147
1148 /* Determine length of uncommitted span. */
1149 span_va_page = va_page;
1150 while (va_page < end_va_page && !comm[va_page].backing)
1151 va_page++;
1152
1153 /* Fill the uncommitted span with chunks of backing memory. */
1154 while (span_va_page < va_page) {
1155 struct amdgpu_sparse_backing *backing;
1156 uint32_t backing_start, backing_size;
1157
1158 backing_size = va_page - span_va_page;
1159 backing = sparse_backing_alloc(bo, &backing_start, &backing_size);
1160 if (!backing) {
1161 ok = false;
1162 goto out;
1163 }
1164
1165 r = amdgpu_bo_va_op_raw(bo->ws->dev, backing->bo->bo,
1166 (uint64_t)backing_start * RADEON_SPARSE_PAGE_SIZE,
1167 (uint64_t)backing_size * RADEON_SPARSE_PAGE_SIZE,
1168 bo->va + (uint64_t)span_va_page * RADEON_SPARSE_PAGE_SIZE,
1169 AMDGPU_VM_PAGE_READABLE |
1170 AMDGPU_VM_PAGE_WRITEABLE |
1171 AMDGPU_VM_PAGE_EXECUTABLE,
1172 AMDGPU_VA_OP_REPLACE);
1173 if (r) {
1174 ok = sparse_backing_free(bo, backing, backing_start, backing_size);
1175 assert(ok && "sufficient memory should already be allocated");
1176
1177 ok = false;
1178 goto out;
1179 }
1180
1181 while (backing_size) {
1182 comm[span_va_page].backing = backing;
1183 comm[span_va_page].page = backing_start;
1184 span_va_page++;
1185 backing_start++;
1186 backing_size--;
1187 }
1188 }
1189 }
1190 } else {
1191 r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
1192 (uint64_t)(end_va_page - va_page) * RADEON_SPARSE_PAGE_SIZE,
1193 bo->va + (uint64_t)va_page * RADEON_SPARSE_PAGE_SIZE,
1194 AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE);
1195 if (r) {
1196 ok = false;
1197 goto out;
1198 }
1199
1200 while (va_page < end_va_page) {
1201 struct amdgpu_sparse_backing *backing;
1202 uint32_t backing_start;
1203 uint32_t span_pages;
1204
1205 /* Skip pages that are already uncommitted. */
1206 if (!comm[va_page].backing) {
1207 va_page++;
1208 continue;
1209 }
1210
1211 /* Group contiguous spans of pages. */
1212 backing = comm[va_page].backing;
1213 backing_start = comm[va_page].page;
1214 comm[va_page].backing = NULL;
1215
1216 span_pages = 1;
1217 va_page++;
1218
1219 while (va_page < end_va_page &&
1220 comm[va_page].backing == backing &&
1221 comm[va_page].page == backing_start + span_pages) {
1222 comm[va_page].backing = NULL;
1223 va_page++;
1224 span_pages++;
1225 }
1226
1227 if (!sparse_backing_free(bo, backing, backing_start, span_pages)) {
1228 /* Couldn't allocate tracking data structures, so we have to leak */
1229 fprintf(stderr, "amdgpu: leaking PRT backing memory\n");
1230 ok = false;
1231 }
1232 }
1233 }
1234 out:
1235
1236 simple_mtx_unlock(&bo->lock);
1237
1238 return ok;
1239 }
1240
amdgpu_buffer_get_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)1241 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
1242 struct radeon_bo_metadata *md,
1243 struct radeon_surf *surf)
1244 {
1245 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
1246 struct amdgpu_bo_info info = {0};
1247 int r;
1248
1249 assert(bo->bo && "must not be called for slab entries");
1250
1251 r = amdgpu_bo_query_info(bo->bo, &info);
1252 if (r)
1253 return;
1254
1255 ac_surface_set_bo_metadata(&bo->ws->info, surf, info.metadata.tiling_info,
1256 &md->mode);
1257
1258 md->size_metadata = info.metadata.size_metadata;
1259 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
1260 }
1261
amdgpu_buffer_set_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)1262 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
1263 struct radeon_bo_metadata *md,
1264 struct radeon_surf *surf)
1265 {
1266 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
1267 struct amdgpu_bo_metadata metadata = {0};
1268
1269 assert(bo->bo && "must not be called for slab entries");
1270
1271 ac_surface_get_bo_metadata(&bo->ws->info, surf, &metadata.tiling_info);
1272
1273 metadata.size_metadata = md->size_metadata;
1274 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
1275
1276 amdgpu_bo_set_metadata(bo->bo, &metadata);
1277 }
1278
1279 struct pb_buffer *
amdgpu_bo_create(struct amdgpu_winsys * ws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)1280 amdgpu_bo_create(struct amdgpu_winsys *ws,
1281 uint64_t size,
1282 unsigned alignment,
1283 enum radeon_bo_domain domain,
1284 enum radeon_bo_flag flags)
1285 {
1286 struct amdgpu_winsys_bo *bo;
1287 int heap = -1;
1288
1289 if (domain & (RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA))
1290 flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_SUBALLOC;
1291
1292 /* VRAM implies WC. This is not optional. */
1293 assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
1294
1295 /* NO_CPU_ACCESS is not valid with GTT. */
1296 assert(!(domain & RADEON_DOMAIN_GTT) || !(flags & RADEON_FLAG_NO_CPU_ACCESS));
1297
1298 /* Sparse buffers must have NO_CPU_ACCESS set. */
1299 assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS);
1300
1301 struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
1302 ws->bo_slabs_encrypted : ws->bo_slabs;
1303 struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1];
1304 unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
1305
1306 /* Sub-allocate small buffers from slabs. */
1307 if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) &&
1308 size <= max_slab_entry_size &&
1309 /* The alignment must be at most the size of the smallest slab entry or
1310 * the next power of two. */
1311 alignment <= MAX2(1 << slabs[0].min_order, util_next_power_of_two(size))) {
1312 struct pb_slab_entry *entry;
1313 int heap = radeon_get_heap_index(domain, flags);
1314
1315 if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
1316 goto no_slab;
1317
1318 struct pb_slabs *slabs = get_slabs(ws, size, flags);
1319 entry = pb_slab_alloc(slabs, size, heap);
1320 if (!entry) {
1321 /* Clean up buffer managers and try again. */
1322 amdgpu_clean_up_buffer_managers(ws);
1323
1324 entry = pb_slab_alloc(slabs, size, heap);
1325 }
1326 if (!entry)
1327 return NULL;
1328
1329 bo = NULL;
1330 bo = container_of(entry, bo, u.slab.entry);
1331
1332 pipe_reference_init(&bo->base.reference, 1);
1333
1334 return &bo->base;
1335 }
1336 no_slab:
1337
1338 if (flags & RADEON_FLAG_SPARSE) {
1339 assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
1340
1341 return amdgpu_bo_sparse_create(ws, size, domain, flags);
1342 }
1343
1344 /* This flag is irrelevant for the cache. */
1345 flags &= ~RADEON_FLAG_NO_SUBALLOC;
1346
1347 /* Align size to page size. This is the minimum alignment for normal
1348 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1349 * like constant/uniform buffers, can benefit from better and more reuse.
1350 */
1351 if (domain & RADEON_DOMAIN_VRAM_GTT) {
1352 size = align64(size, ws->info.gart_page_size);
1353 alignment = align(alignment, ws->info.gart_page_size);
1354 }
1355
1356 bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
1357
1358 if (use_reusable_pool) {
1359 heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_ENCRYPTED);
1360 assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
1361
1362 /* Get a buffer from the cache. */
1363 bo = (struct amdgpu_winsys_bo*)
1364 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap);
1365 if (bo)
1366 return &bo->base;
1367 }
1368
1369 /* Create a new one. */
1370 bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
1371 if (!bo) {
1372 /* Clean up buffer managers and try again. */
1373 amdgpu_clean_up_buffer_managers(ws);
1374
1375 bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
1376 if (!bo)
1377 return NULL;
1378 }
1379
1380 bo->u.real.use_reusable_pool = use_reusable_pool;
1381 return &bo->base;
1382 }
1383
1384 static struct pb_buffer *
amdgpu_buffer_create(struct radeon_winsys * ws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)1385 amdgpu_buffer_create(struct radeon_winsys *ws,
1386 uint64_t size,
1387 unsigned alignment,
1388 enum radeon_bo_domain domain,
1389 enum radeon_bo_flag flags)
1390 {
1391 struct pb_buffer * res = amdgpu_bo_create(amdgpu_winsys(ws), size, alignment, domain,
1392 flags);
1393 return res;
1394 }
1395
amdgpu_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned vm_alignment)1396 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
1397 struct winsys_handle *whandle,
1398 unsigned vm_alignment)
1399 {
1400 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
1401 struct amdgpu_winsys_bo *bo = NULL;
1402 enum amdgpu_bo_handle_type type;
1403 struct amdgpu_bo_import_result result = {0};
1404 uint64_t va;
1405 amdgpu_va_handle va_handle = NULL;
1406 struct amdgpu_bo_info info = {0};
1407 enum radeon_bo_domain initial = 0;
1408 enum radeon_bo_flag flags = 0;
1409 int r;
1410
1411 switch (whandle->type) {
1412 case WINSYS_HANDLE_TYPE_SHARED:
1413 type = amdgpu_bo_handle_type_gem_flink_name;
1414 break;
1415 case WINSYS_HANDLE_TYPE_FD:
1416 type = amdgpu_bo_handle_type_dma_buf_fd;
1417 break;
1418 default:
1419 return NULL;
1420 }
1421
1422 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
1423 if (r)
1424 return NULL;
1425
1426 simple_mtx_lock(&ws->bo_export_table_lock);
1427 bo = util_hash_table_get(ws->bo_export_table, result.buf_handle);
1428
1429 /* If the amdgpu_winsys_bo instance already exists, bump the reference
1430 * counter and return it.
1431 */
1432 if (bo) {
1433 p_atomic_inc(&bo->base.reference.count);
1434 simple_mtx_unlock(&ws->bo_export_table_lock);
1435
1436 /* Release the buffer handle, because we don't need it anymore.
1437 * This function is returning an existing buffer, which has its own
1438 * handle.
1439 */
1440 amdgpu_bo_free(result.buf_handle);
1441 return &bo->base;
1442 }
1443
1444 /* Get initial domains. */
1445 r = amdgpu_bo_query_info(result.buf_handle, &info);
1446 if (r)
1447 goto error;
1448
1449 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
1450 result.alloc_size,
1451 amdgpu_get_optimal_alignment(ws, result.alloc_size,
1452 vm_alignment),
1453 0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
1454 if (r)
1455 goto error;
1456
1457 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
1458 if (!bo)
1459 goto error;
1460
1461 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
1462 if (r)
1463 goto error;
1464
1465 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
1466 initial |= RADEON_DOMAIN_VRAM;
1467 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
1468 initial |= RADEON_DOMAIN_GTT;
1469 if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
1470 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1471 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
1472 flags |= RADEON_FLAG_GTT_WC;
1473 if (info.alloc_flags & AMDGPU_GEM_CREATE_ENCRYPTED) {
1474 /* Imports are always possible even if the importer isn't using TMZ.
1475 * For instance libweston needs to import the buffer to be able to determine
1476 * if it can be used for scanout.
1477 */
1478 flags |= RADEON_FLAG_ENCRYPTED;
1479 }
1480
1481 /* Initialize the structure. */
1482 simple_mtx_init(&bo->lock, mtx_plain);
1483 pipe_reference_init(&bo->base.reference, 1);
1484 bo->base.alignment = info.phys_alignment;
1485 bo->bo = result.buf_handle;
1486 bo->base.size = result.alloc_size;
1487 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
1488 bo->ws = ws;
1489 bo->va = va;
1490 bo->u.real.va_handle = va_handle;
1491 bo->initial_domain = initial;
1492 bo->flags = flags;
1493 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
1494 bo->is_shared = true;
1495
1496 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1497 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
1498 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1499 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
1500
1501 amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
1502
1503 amdgpu_add_buffer_to_global_list(bo);
1504
1505 _mesa_hash_table_insert(ws->bo_export_table, bo->bo, bo);
1506 simple_mtx_unlock(&ws->bo_export_table_lock);
1507
1508 return &bo->base;
1509
1510 error:
1511 simple_mtx_unlock(&ws->bo_export_table_lock);
1512 if (bo)
1513 FREE(bo);
1514 if (va_handle)
1515 amdgpu_va_range_free(va_handle);
1516 amdgpu_bo_free(result.buf_handle);
1517 return NULL;
1518 }
1519
amdgpu_bo_get_handle(struct radeon_winsys * rws,struct pb_buffer * buffer,struct winsys_handle * whandle)1520 static bool amdgpu_bo_get_handle(struct radeon_winsys *rws,
1521 struct pb_buffer *buffer,
1522 struct winsys_handle *whandle)
1523 {
1524 struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws);
1525 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
1526 struct amdgpu_winsys *ws = bo->ws;
1527 enum amdgpu_bo_handle_type type;
1528 struct hash_entry *entry;
1529 int r;
1530
1531 /* Don't allow exports of slab entries and sparse buffers. */
1532 if (!bo->bo)
1533 return false;
1534
1535 bo->u.real.use_reusable_pool = false;
1536
1537 switch (whandle->type) {
1538 case WINSYS_HANDLE_TYPE_SHARED:
1539 type = amdgpu_bo_handle_type_gem_flink_name;
1540 break;
1541 case WINSYS_HANDLE_TYPE_KMS:
1542 if (sws->fd == ws->fd) {
1543 whandle->handle = bo->u.real.kms_handle;
1544
1545 if (bo->is_shared)
1546 return true;
1547
1548 goto hash_table_set;
1549 }
1550
1551 simple_mtx_lock(&ws->sws_list_lock);
1552 entry = _mesa_hash_table_search(sws->kms_handles, bo);
1553 simple_mtx_unlock(&ws->sws_list_lock);
1554 if (entry) {
1555 whandle->handle = (uintptr_t)entry->data;
1556 return true;
1557 }
1558 /* Fall through */
1559 case WINSYS_HANDLE_TYPE_FD:
1560 type = amdgpu_bo_handle_type_dma_buf_fd;
1561 break;
1562 default:
1563 return false;
1564 }
1565
1566 r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
1567 if (r)
1568 return false;
1569
1570 if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1571 int dma_fd = whandle->handle;
1572
1573 r = drmPrimeFDToHandle(sws->fd, dma_fd, &whandle->handle);
1574 close(dma_fd);
1575
1576 if (r)
1577 return false;
1578
1579 simple_mtx_lock(&ws->sws_list_lock);
1580 _mesa_hash_table_insert_pre_hashed(sws->kms_handles,
1581 bo->u.real.kms_handle, bo,
1582 (void*)(uintptr_t)whandle->handle);
1583 simple_mtx_unlock(&ws->sws_list_lock);
1584 }
1585
1586 hash_table_set:
1587 simple_mtx_lock(&ws->bo_export_table_lock);
1588 _mesa_hash_table_insert(ws->bo_export_table, bo->bo, bo);
1589 simple_mtx_unlock(&ws->bo_export_table_lock);
1590
1591 bo->is_shared = true;
1592 return true;
1593 }
1594
amdgpu_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size)1595 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
1596 void *pointer, uint64_t size)
1597 {
1598 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
1599 amdgpu_bo_handle buf_handle;
1600 struct amdgpu_winsys_bo *bo;
1601 uint64_t va;
1602 amdgpu_va_handle va_handle;
1603 /* Avoid failure when the size is not page aligned */
1604 uint64_t aligned_size = align64(size, ws->info.gart_page_size);
1605
1606 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
1607 if (!bo)
1608 return NULL;
1609
1610 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer,
1611 aligned_size, &buf_handle))
1612 goto error;
1613
1614 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
1615 aligned_size,
1616 amdgpu_get_optimal_alignment(ws, aligned_size,
1617 ws->info.gart_page_size),
1618 0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH))
1619 goto error_va_alloc;
1620
1621 if (amdgpu_bo_va_op(buf_handle, 0, aligned_size, va, 0, AMDGPU_VA_OP_MAP))
1622 goto error_va_map;
1623
1624 /* Initialize it. */
1625 bo->is_user_ptr = true;
1626 pipe_reference_init(&bo->base.reference, 1);
1627 simple_mtx_init(&bo->lock, mtx_plain);
1628 bo->bo = buf_handle;
1629 bo->base.alignment = 0;
1630 bo->base.size = size;
1631 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
1632 bo->ws = ws;
1633 bo->cpu_ptr = pointer;
1634 bo->va = va;
1635 bo->u.real.va_handle = va_handle;
1636 bo->initial_domain = RADEON_DOMAIN_GTT;
1637 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
1638
1639 ws->allocated_gtt += aligned_size;
1640
1641 amdgpu_add_buffer_to_global_list(bo);
1642
1643 amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
1644
1645 return (struct pb_buffer*)bo;
1646
1647 error_va_map:
1648 amdgpu_va_range_free(va_handle);
1649
1650 error_va_alloc:
1651 amdgpu_bo_free(buf_handle);
1652
1653 error:
1654 FREE(bo);
1655 return NULL;
1656 }
1657
amdgpu_bo_is_user_ptr(struct pb_buffer * buf)1658 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
1659 {
1660 return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr;
1661 }
1662
amdgpu_bo_is_suballocated(struct pb_buffer * buf)1663 static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf)
1664 {
1665 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
1666
1667 return !bo->bo && !bo->sparse;
1668 }
1669
amdgpu_bo_get_va(struct pb_buffer * buf)1670 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
1671 {
1672 return ((struct amdgpu_winsys_bo*)buf)->va;
1673 }
1674
amdgpu_bo_init_functions(struct amdgpu_screen_winsys * ws)1675 void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *ws)
1676 {
1677 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
1678 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
1679 ws->base.buffer_map = amdgpu_bo_map;
1680 ws->base.buffer_unmap = amdgpu_bo_unmap;
1681 ws->base.buffer_wait = amdgpu_bo_wait;
1682 ws->base.buffer_create = amdgpu_buffer_create;
1683 ws->base.buffer_from_handle = amdgpu_bo_from_handle;
1684 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
1685 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
1686 ws->base.buffer_is_suballocated = amdgpu_bo_is_suballocated;
1687 ws->base.buffer_get_handle = amdgpu_bo_get_handle;
1688 ws->base.buffer_commit = amdgpu_bo_sparse_commit;
1689 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
1690 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
1691 ws->base.buffer_get_flags = amdgpu_bo_get_flags;
1692 }
1693