• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3  * Copyright © 2015 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * The above copyright notice and this permission notice (including the
24  * next paragraph) shall be included in all copies or substantial portions
25  * of the Software.
26  */
27 
28 #include "amdgpu_cs.h"
29 
30 #include "util/hash_table.h"
31 #include "util/os_time.h"
32 #include "util/u_hash_table.h"
33 #include "frontend/drm_driver.h"
34 #include "drm-uapi/amdgpu_drm.h"
35 #include <xf86drm.h>
36 #include <stdio.h>
37 #include <inttypes.h>
38 
39 #ifndef AMDGPU_VA_RANGE_HIGH
40 #define AMDGPU_VA_RANGE_HIGH	0x2
41 #endif
42 
43 /* Set to 1 for verbose output showing committed sparse buffer ranges. */
44 #define DEBUG_SPARSE_COMMITS 0
45 
46 struct amdgpu_sparse_backing_chunk {
47    uint32_t begin, end;
48 };
49 
amdgpu_bo_wait(struct pb_buffer * _buf,uint64_t timeout,enum radeon_bo_usage usage)50 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
51                            enum radeon_bo_usage usage)
52 {
53    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
54    struct amdgpu_winsys *ws = bo->ws;
55    int64_t abs_timeout;
56 
57    if (timeout == 0) {
58       if (p_atomic_read(&bo->num_active_ioctls))
59          return false;
60 
61    } else {
62       abs_timeout = os_time_get_absolute_timeout(timeout);
63 
64       /* Wait if any ioctl is being submitted with this buffer. */
65       if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
66          return false;
67    }
68 
69    if (bo->is_shared) {
70       /* We can't use user fences for shared buffers, because user fences
71        * are local to this process only. If we want to wait for all buffer
72        * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
73        */
74       bool buffer_busy = true;
75       int r;
76 
77       r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
78       if (r)
79          fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
80                  r);
81       return !buffer_busy;
82    }
83 
84    if (timeout == 0) {
85       unsigned idle_fences;
86       bool buffer_idle;
87 
88       simple_mtx_lock(&ws->bo_fence_lock);
89 
90       for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
91          if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
92             break;
93       }
94 
95       /* Release the idle fences to avoid checking them again later. */
96       for (unsigned i = 0; i < idle_fences; ++i)
97          amdgpu_fence_reference(&bo->fences[i], NULL);
98 
99       memmove(&bo->fences[0], &bo->fences[idle_fences],
100               (bo->num_fences - idle_fences) * sizeof(*bo->fences));
101       bo->num_fences -= idle_fences;
102 
103       buffer_idle = !bo->num_fences;
104       simple_mtx_unlock(&ws->bo_fence_lock);
105 
106       return buffer_idle;
107    } else {
108       bool buffer_idle = true;
109 
110       simple_mtx_lock(&ws->bo_fence_lock);
111       while (bo->num_fences && buffer_idle) {
112          struct pipe_fence_handle *fence = NULL;
113          bool fence_idle = false;
114 
115          amdgpu_fence_reference(&fence, bo->fences[0]);
116 
117          /* Wait for the fence. */
118          simple_mtx_unlock(&ws->bo_fence_lock);
119          if (amdgpu_fence_wait(fence, abs_timeout, true))
120             fence_idle = true;
121          else
122             buffer_idle = false;
123          simple_mtx_lock(&ws->bo_fence_lock);
124 
125          /* Release an idle fence to avoid checking it again later, keeping in
126           * mind that the fence array may have been modified by other threads.
127           */
128          if (fence_idle && bo->num_fences && bo->fences[0] == fence) {
129             amdgpu_fence_reference(&bo->fences[0], NULL);
130             memmove(&bo->fences[0], &bo->fences[1],
131                     (bo->num_fences - 1) * sizeof(*bo->fences));
132             bo->num_fences--;
133          }
134 
135          amdgpu_fence_reference(&fence, NULL);
136       }
137       simple_mtx_unlock(&ws->bo_fence_lock);
138 
139       return buffer_idle;
140    }
141 }
142 
amdgpu_bo_get_initial_domain(struct pb_buffer * buf)143 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
144       struct pb_buffer *buf)
145 {
146    return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
147 }
148 
amdgpu_bo_get_flags(struct pb_buffer * buf)149 static enum radeon_bo_flag amdgpu_bo_get_flags(
150       struct pb_buffer *buf)
151 {
152    return ((struct amdgpu_winsys_bo*)buf)->flags;
153 }
154 
amdgpu_bo_remove_fences(struct amdgpu_winsys_bo * bo)155 static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)
156 {
157    for (unsigned i = 0; i < bo->num_fences; ++i)
158       amdgpu_fence_reference(&bo->fences[i], NULL);
159 
160    FREE(bo->fences);
161    bo->num_fences = 0;
162    bo->max_fences = 0;
163 }
164 
amdgpu_bo_destroy(struct pb_buffer * _buf)165 void amdgpu_bo_destroy(struct pb_buffer *_buf)
166 {
167    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
168    struct amdgpu_screen_winsys *sws_iter;
169    struct amdgpu_winsys *ws = bo->ws;
170 
171    assert(bo->bo && "must not be called for slab entries");
172 
173    if (!bo->is_user_ptr && bo->cpu_ptr) {
174       bo->cpu_ptr = NULL;
175       amdgpu_bo_unmap(&bo->base);
176    }
177    assert(bo->is_user_ptr || bo->u.real.map_count == 0);
178 
179    if (ws->debug_all_bos) {
180       simple_mtx_lock(&ws->global_bo_list_lock);
181       list_del(&bo->u.real.global_list_item);
182       ws->num_buffers--;
183       simple_mtx_unlock(&ws->global_bo_list_lock);
184    }
185 
186    /* Close all KMS handles retrieved for other DRM file descriptions */
187    simple_mtx_lock(&ws->sws_list_lock);
188    for (sws_iter = ws->sws_list; sws_iter; sws_iter = sws_iter->next) {
189       struct hash_entry *entry;
190 
191       if (!sws_iter->kms_handles)
192          continue;
193 
194       entry = _mesa_hash_table_search(sws_iter->kms_handles, bo);
195       if (entry) {
196          struct drm_gem_close args = { .handle = (uintptr_t)entry->data };
197 
198          drmIoctl(sws_iter->fd, DRM_IOCTL_GEM_CLOSE, &args);
199          _mesa_hash_table_remove(sws_iter->kms_handles, entry);
200       }
201    }
202    simple_mtx_unlock(&ws->sws_list_lock);
203 
204    simple_mtx_lock(&ws->bo_export_table_lock);
205    _mesa_hash_table_remove_key(ws->bo_export_table, bo->bo);
206    simple_mtx_unlock(&ws->bo_export_table_lock);
207 
208    if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) {
209       amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
210       amdgpu_va_range_free(bo->u.real.va_handle);
211    }
212    amdgpu_bo_free(bo->bo);
213 
214    amdgpu_bo_remove_fences(bo);
215 
216    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
217       ws->allocated_vram -= align64(bo->base.size, ws->info.gart_page_size);
218    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
219       ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);
220 
221    simple_mtx_destroy(&bo->lock);
222    FREE(bo);
223 }
224 
amdgpu_bo_destroy_or_cache(struct pb_buffer * _buf)225 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
226 {
227    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
228 
229    assert(bo->bo); /* slab buffers have a separate vtbl */
230 
231    if (bo->u.real.use_reusable_pool)
232       pb_cache_add_buffer(&bo->u.real.cache_entry);
233    else
234       amdgpu_bo_destroy(_buf);
235 }
236 
amdgpu_clean_up_buffer_managers(struct amdgpu_winsys * ws)237 static void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws)
238 {
239    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
240       pb_slabs_reclaim(&ws->bo_slabs[i]);
241       if (ws->info.has_tmz_support)
242          pb_slabs_reclaim(&ws->bo_slabs_encrypted[i]);
243    }
244 
245    pb_cache_release_all_buffers(&ws->bo_cache);
246 }
247 
amdgpu_bo_do_map(struct amdgpu_winsys_bo * bo,void ** cpu)248 static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu)
249 {
250    assert(!bo->sparse && bo->bo && !bo->is_user_ptr);
251    int r = amdgpu_bo_cpu_map(bo->bo, cpu);
252    if (r) {
253       /* Clean up buffer managers and try again. */
254       amdgpu_clean_up_buffer_managers(bo->ws);
255       r = amdgpu_bo_cpu_map(bo->bo, cpu);
256       if (r)
257          return false;
258    }
259 
260    if (p_atomic_inc_return(&bo->u.real.map_count) == 1) {
261       if (bo->initial_domain & RADEON_DOMAIN_VRAM)
262          bo->ws->mapped_vram += bo->base.size;
263       else if (bo->initial_domain & RADEON_DOMAIN_GTT)
264          bo->ws->mapped_gtt += bo->base.size;
265       bo->ws->num_mapped_buffers++;
266    }
267 
268    return true;
269 }
270 
amdgpu_bo_map(struct pb_buffer * buf,struct radeon_cmdbuf * rcs,enum pipe_map_flags usage)271 void *amdgpu_bo_map(struct pb_buffer *buf,
272                     struct radeon_cmdbuf *rcs,
273                     enum pipe_map_flags usage)
274 {
275    struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
276    struct amdgpu_winsys_bo *real;
277    struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
278 
279    assert(!bo->sparse);
280 
281    /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
282    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
283       /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
284       if (usage & PIPE_MAP_DONTBLOCK) {
285          if (!(usage & PIPE_MAP_WRITE)) {
286             /* Mapping for read.
287              *
288              * Since we are mapping for read, we don't need to wait
289              * if the GPU is using the buffer for read too
290              * (neither one is changing it).
291              *
292              * Only check whether the buffer is being used for write. */
293             if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
294                                                                RADEON_USAGE_WRITE)) {
295                cs->flush_cs(cs->flush_data,
296 			    RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
297                return NULL;
298             }
299 
300             if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
301                                 RADEON_USAGE_WRITE)) {
302                return NULL;
303             }
304          } else {
305             if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
306                cs->flush_cs(cs->flush_data,
307 			    RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
308                return NULL;
309             }
310 
311             if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
312                                 RADEON_USAGE_READWRITE)) {
313                return NULL;
314             }
315          }
316       } else {
317          uint64_t time = os_time_get_nano();
318 
319          if (!(usage & PIPE_MAP_WRITE)) {
320             /* Mapping for read.
321              *
322              * Since we are mapping for read, we don't need to wait
323              * if the GPU is using the buffer for read too
324              * (neither one is changing it).
325              *
326              * Only check whether the buffer is being used for write. */
327             if (cs) {
328                if (amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
329                                                             RADEON_USAGE_WRITE)) {
330                   cs->flush_cs(cs->flush_data,
331 			       RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
332                } else {
333                   /* Try to avoid busy-waiting in amdgpu_bo_wait. */
334                   if (p_atomic_read(&bo->num_active_ioctls))
335                      amdgpu_cs_sync_flush(rcs);
336                }
337             }
338 
339             amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
340                            RADEON_USAGE_WRITE);
341          } else {
342             /* Mapping for write. */
343             if (cs) {
344                if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
345                   cs->flush_cs(cs->flush_data,
346 			       RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
347                } else {
348                   /* Try to avoid busy-waiting in amdgpu_bo_wait. */
349                   if (p_atomic_read(&bo->num_active_ioctls))
350                      amdgpu_cs_sync_flush(rcs);
351                }
352             }
353 
354             amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
355                            RADEON_USAGE_READWRITE);
356          }
357 
358          bo->ws->buffer_wait_time += os_time_get_nano() - time;
359       }
360    }
361 
362    /* Buffer synchronization has been checked, now actually map the buffer. */
363    void *cpu = NULL;
364    uint64_t offset = 0;
365 
366    if (bo->bo) {
367       real = bo;
368    } else {
369       real = bo->u.slab.real;
370       offset = bo->va - real->va;
371    }
372 
373    if (usage & RADEON_MAP_TEMPORARY) {
374       if (real->is_user_ptr) {
375          cpu = real->cpu_ptr;
376       } else {
377          if (!amdgpu_bo_do_map(real, &cpu))
378             return NULL;
379       }
380    } else {
381       cpu = p_atomic_read(&real->cpu_ptr);
382       if (!cpu) {
383          simple_mtx_lock(&real->lock);
384          /* Must re-check due to the possibility of a race. Re-check need not
385           * be atomic thanks to the lock. */
386          cpu = real->cpu_ptr;
387          if (!cpu) {
388             if (!amdgpu_bo_do_map(real, &cpu)) {
389                simple_mtx_unlock(&real->lock);
390                return NULL;
391             }
392             p_atomic_set(&real->cpu_ptr, cpu);
393          }
394          simple_mtx_unlock(&real->lock);
395       }
396    }
397 
398    return (uint8_t*)cpu + offset;
399 }
400 
amdgpu_bo_unmap(struct pb_buffer * buf)401 void amdgpu_bo_unmap(struct pb_buffer *buf)
402 {
403    struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
404    struct amdgpu_winsys_bo *real;
405 
406    assert(!bo->sparse);
407 
408    if (bo->is_user_ptr)
409       return;
410 
411    real = bo->bo ? bo : bo->u.slab.real;
412    assert(real->u.real.map_count != 0 && "too many unmaps");
413    if (p_atomic_dec_zero(&real->u.real.map_count)) {
414       assert(!real->cpu_ptr &&
415              "too many unmaps or forgot RADEON_MAP_TEMPORARY flag");
416 
417       if (real->initial_domain & RADEON_DOMAIN_VRAM)
418          real->ws->mapped_vram -= real->base.size;
419       else if (real->initial_domain & RADEON_DOMAIN_GTT)
420          real->ws->mapped_gtt -= real->base.size;
421       real->ws->num_mapped_buffers--;
422    }
423 
424    amdgpu_bo_cpu_unmap(real->bo);
425 }
426 
427 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
428    amdgpu_bo_destroy_or_cache
429    /* other functions are never called */
430 };
431 
amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo * bo)432 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
433 {
434    struct amdgpu_winsys *ws = bo->ws;
435 
436    assert(bo->bo);
437 
438    if (ws->debug_all_bos) {
439       simple_mtx_lock(&ws->global_bo_list_lock);
440       list_addtail(&bo->u.real.global_list_item, &ws->global_bo_list);
441       ws->num_buffers++;
442       simple_mtx_unlock(&ws->global_bo_list_lock);
443    }
444 }
445 
amdgpu_get_optimal_alignment(struct amdgpu_winsys * ws,uint64_t size,unsigned alignment)446 static unsigned amdgpu_get_optimal_alignment(struct amdgpu_winsys *ws,
447                                              uint64_t size, unsigned alignment)
448 {
449    /* Increase the alignment for faster address translation and better memory
450     * access pattern.
451     */
452    if (size >= ws->info.pte_fragment_size) {
453       alignment = MAX2(alignment, ws->info.pte_fragment_size);
454    } else if (size) {
455       unsigned msb = util_last_bit(size);
456 
457       alignment = MAX2(alignment, 1u << (msb - 1));
458    }
459    return alignment;
460 }
461 
amdgpu_create_bo(struct amdgpu_winsys * ws,uint64_t size,unsigned alignment,enum radeon_bo_domain initial_domain,unsigned flags,int heap)462 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
463                                                  uint64_t size,
464                                                  unsigned alignment,
465                                                  enum radeon_bo_domain initial_domain,
466                                                  unsigned flags,
467                                                  int heap)
468 {
469    struct amdgpu_bo_alloc_request request = {0};
470    amdgpu_bo_handle buf_handle;
471    uint64_t va = 0;
472    struct amdgpu_winsys_bo *bo;
473    amdgpu_va_handle va_handle = NULL;
474    int r;
475 
476    /* VRAM or GTT must be specified, but not both at the same time. */
477    assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT |
478                                           RADEON_DOMAIN_GDS |
479                                           RADEON_DOMAIN_OA)) == 1);
480 
481    alignment = amdgpu_get_optimal_alignment(ws, size, alignment);
482 
483    bo = CALLOC_STRUCT(amdgpu_winsys_bo);
484    if (!bo) {
485       return NULL;
486    }
487 
488    if (heap >= 0) {
489       pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
490                           heap);
491    }
492    request.alloc_size = size;
493    request.phys_alignment = alignment;
494 
495    if (initial_domain & RADEON_DOMAIN_VRAM) {
496       request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
497 
498       /* Since VRAM and GTT have almost the same performance on APUs, we could
499        * just set GTT. However, in order to decrease GTT(RAM) usage, which is
500        * shared with the OS, allow VRAM placements too. The idea is not to use
501        * VRAM usefully, but to use it so that it's not unused and wasted.
502        */
503       if (!ws->info.has_dedicated_vram)
504          request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
505    }
506 
507    if (initial_domain & RADEON_DOMAIN_GTT)
508       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
509    if (initial_domain & RADEON_DOMAIN_GDS)
510       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
511    if (initial_domain & RADEON_DOMAIN_OA)
512       request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
513 
514    if (flags & RADEON_FLAG_NO_CPU_ACCESS)
515       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
516    if (flags & RADEON_FLAG_GTT_WC)
517       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
518    if (ws->zero_all_vram_allocs &&
519        (request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM))
520       request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
521    if ((flags & RADEON_FLAG_ENCRYPTED) &&
522        ws->info.has_tmz_support) {
523       request.flags |= AMDGPU_GEM_CREATE_ENCRYPTED;
524 
525       if (!(flags & RADEON_FLAG_DRIVER_INTERNAL)) {
526          struct amdgpu_screen_winsys *sws_iter;
527          simple_mtx_lock(&ws->sws_list_lock);
528          for (sws_iter = ws->sws_list; sws_iter; sws_iter = sws_iter->next) {
529             *((bool*) &sws_iter->base.uses_secure_bos) = true;
530          }
531          simple_mtx_unlock(&ws->sws_list_lock);
532       }
533    }
534 
535    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
536    if (r) {
537       fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
538       fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);
539       fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
540       fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
541       fprintf(stderr, "amdgpu:    flags   : %" PRIx64 "\n", request.flags);
542       goto error_bo_alloc;
543    }
544 
545    if (initial_domain & RADEON_DOMAIN_VRAM_GTT) {
546       unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
547 
548       r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
549                                 size + va_gap_size, alignment,
550                                 0, &va, &va_handle,
551                                 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
552                                 AMDGPU_VA_RANGE_HIGH);
553       if (r)
554          goto error_va_alloc;
555 
556       unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |
557                           AMDGPU_VM_PAGE_EXECUTABLE;
558 
559       if (!(flags & RADEON_FLAG_READ_ONLY))
560          vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
561 
562       if (flags & RADEON_FLAG_UNCACHED)
563          vm_flags |= AMDGPU_VM_MTYPE_UC;
564 
565       r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
566 			   AMDGPU_VA_OP_MAP);
567       if (r)
568          goto error_va_map;
569    }
570 
571    simple_mtx_init(&bo->lock, mtx_plain);
572    pipe_reference_init(&bo->base.reference, 1);
573    bo->base.alignment = alignment;
574    bo->base.usage = 0;
575    bo->base.size = size;
576    bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
577    bo->ws = ws;
578    bo->bo = buf_handle;
579    bo->va = va;
580    bo->u.real.va_handle = va_handle;
581    bo->initial_domain = initial_domain;
582    bo->flags = flags;
583    bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
584 
585    if (initial_domain & RADEON_DOMAIN_VRAM)
586       ws->allocated_vram += align64(size, ws->info.gart_page_size);
587    else if (initial_domain & RADEON_DOMAIN_GTT)
588       ws->allocated_gtt += align64(size, ws->info.gart_page_size);
589 
590    amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
591 
592    amdgpu_add_buffer_to_global_list(bo);
593 
594    return bo;
595 
596 error_va_map:
597    amdgpu_va_range_free(va_handle);
598 
599 error_va_alloc:
600    amdgpu_bo_free(buf_handle);
601 
602 error_bo_alloc:
603    FREE(bo);
604    return NULL;
605 }
606 
amdgpu_bo_can_reclaim(struct pb_buffer * _buf)607 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
608 {
609    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
610 
611    if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
612       return false;
613    }
614 
615    return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
616 }
617 
amdgpu_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)618 bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
619 {
620    struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */
621    bo = container_of(entry, bo, u.slab.entry);
622 
623    return amdgpu_bo_can_reclaim(&bo->base);
624 }
625 
get_slabs(struct amdgpu_winsys * ws,uint64_t size,enum radeon_bo_flag flags)626 static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size,
627                                   enum radeon_bo_flag flags)
628 {
629    struct pb_slabs *bo_slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
630       ws->bo_slabs_encrypted : ws->bo_slabs;
631    /* Find the correct slab allocator for the given size. */
632    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
633       struct pb_slabs *slabs = &bo_slabs[i];
634 
635       if (size <= 1 << (slabs->min_order + slabs->num_orders - 1))
636          return slabs;
637    }
638 
639    assert(0);
640    return NULL;
641 }
642 
amdgpu_bo_slab_destroy(struct pb_buffer * _buf)643 static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
644 {
645    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
646 
647    assert(!bo->bo);
648 
649    if (bo->flags & RADEON_FLAG_ENCRYPTED)
650       pb_slab_free(get_slabs(bo->ws,
651                              bo->base.size,
652                              RADEON_FLAG_ENCRYPTED), &bo->u.slab.entry);
653    else
654       pb_slab_free(get_slabs(bo->ws,
655                              bo->base.size,
656                              0), &bo->u.slab.entry);
657 }
658 
659 static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = {
660    amdgpu_bo_slab_destroy
661    /* other functions are never called */
662 };
663 
amdgpu_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index,bool encrypted)664 static struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
665                                             unsigned entry_size,
666                                             unsigned group_index,
667                                             bool encrypted)
668 {
669    struct amdgpu_winsys *ws = priv;
670    struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
671    enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
672    enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
673    uint32_t base_id;
674    unsigned slab_size = 0;
675 
676    if (!slab)
677       return NULL;
678 
679    if (encrypted)
680       flags |= RADEON_FLAG_ENCRYPTED;
681 
682    struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
683       ws->bo_slabs_encrypted : ws->bo_slabs;
684 
685    /* Determine the slab buffer size. */
686    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
687       unsigned max_entry_size = 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
688 
689       if (entry_size <= max_entry_size) {
690          /* The slab size is twice the size of the largest possible entry. */
691          slab_size = max_entry_size * 2;
692 
693          /* The largest slab should have the same size as the PTE fragment
694           * size to get faster address translation.
695           */
696          if (i == NUM_SLAB_ALLOCATORS - 1 &&
697              slab_size < ws->info.pte_fragment_size)
698             slab_size = ws->info.pte_fragment_size;
699          break;
700       }
701    }
702    assert(slab_size != 0);
703 
704    slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(ws,
705                                                     slab_size, slab_size,
706                                                     domains, flags));
707    if (!slab->buffer)
708       goto fail;
709 
710    slab->base.num_entries = slab->buffer->base.size / entry_size;
711    slab->base.num_free = slab->base.num_entries;
712    slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
713    if (!slab->entries)
714       goto fail_buffer;
715 
716    list_inithead(&slab->base.free);
717 
718    base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries);
719 
720    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
721       struct amdgpu_winsys_bo *bo = &slab->entries[i];
722 
723       simple_mtx_init(&bo->lock, mtx_plain);
724       bo->base.alignment = entry_size;
725       bo->base.usage = slab->buffer->base.usage;
726       bo->base.size = entry_size;
727       bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl;
728       bo->ws = ws;
729       bo->va = slab->buffer->va + i * entry_size;
730       bo->initial_domain = domains;
731       bo->unique_id = base_id + i;
732       bo->u.slab.entry.slab = &slab->base;
733       bo->u.slab.entry.group_index = group_index;
734 
735       if (slab->buffer->bo) {
736          /* The slab is not suballocated. */
737          bo->u.slab.real = slab->buffer;
738       } else {
739          /* The slab is allocated out of a bigger slab. */
740          bo->u.slab.real = slab->buffer->u.slab.real;
741          assert(bo->u.slab.real->bo);
742       }
743 
744       list_addtail(&bo->u.slab.entry.head, &slab->base.free);
745    }
746 
747    return &slab->base;
748 
749 fail_buffer:
750    amdgpu_winsys_bo_reference(&slab->buffer, NULL);
751 fail:
752    FREE(slab);
753    return NULL;
754 }
755 
amdgpu_bo_slab_alloc_encrypted(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)756 struct pb_slab *amdgpu_bo_slab_alloc_encrypted(void *priv, unsigned heap,
757                                                unsigned entry_size,
758                                                unsigned group_index)
759 {
760    return amdgpu_bo_slab_alloc(priv, heap, entry_size, group_index, true);
761 }
762 
amdgpu_bo_slab_alloc_normal(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)763 struct pb_slab *amdgpu_bo_slab_alloc_normal(void *priv, unsigned heap,
764                                             unsigned entry_size,
765                                             unsigned group_index)
766 {
767    return amdgpu_bo_slab_alloc(priv, heap, entry_size, group_index, false);
768 }
769 
amdgpu_bo_slab_free(void * priv,struct pb_slab * pslab)770 void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
771 {
772    struct amdgpu_slab *slab = amdgpu_slab(pslab);
773 
774    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
775       amdgpu_bo_remove_fences(&slab->entries[i]);
776       simple_mtx_destroy(&slab->entries[i].lock);
777    }
778 
779    FREE(slab->entries);
780    amdgpu_winsys_bo_reference(&slab->buffer, NULL);
781    FREE(slab);
782 }
783 
784 #if DEBUG_SPARSE_COMMITS
785 static void
sparse_dump(struct amdgpu_winsys_bo * bo,const char * func)786 sparse_dump(struct amdgpu_winsys_bo *bo, const char *func)
787 {
788    fprintf(stderr, "%s: %p (size=%"PRIu64", num_va_pages=%u) @ %s\n"
789                    "Commitments:\n",
790            __func__, bo, bo->base.size, bo->u.sparse.num_va_pages, func);
791 
792    struct amdgpu_sparse_backing *span_backing = NULL;
793    uint32_t span_first_backing_page = 0;
794    uint32_t span_first_va_page = 0;
795    uint32_t va_page = 0;
796 
797    for (;;) {
798       struct amdgpu_sparse_backing *backing = 0;
799       uint32_t backing_page = 0;
800 
801       if (va_page < bo->u.sparse.num_va_pages) {
802          backing = bo->u.sparse.commitments[va_page].backing;
803          backing_page = bo->u.sparse.commitments[va_page].page;
804       }
805 
806       if (span_backing &&
807           (backing != span_backing ||
808            backing_page != span_first_backing_page + (va_page - span_first_va_page))) {
809          fprintf(stderr, " %u..%u: backing=%p:%u..%u\n",
810                  span_first_va_page, va_page - 1, span_backing,
811                  span_first_backing_page,
812                  span_first_backing_page + (va_page - span_first_va_page) - 1);
813 
814          span_backing = NULL;
815       }
816 
817       if (va_page >= bo->u.sparse.num_va_pages)
818          break;
819 
820       if (backing && !span_backing) {
821          span_backing = backing;
822          span_first_backing_page = backing_page;
823          span_first_va_page = va_page;
824       }
825 
826       va_page++;
827    }
828 
829    fprintf(stderr, "Backing:\n");
830 
831    list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
832       fprintf(stderr, " %p (size=%"PRIu64")\n", backing, backing->bo->base.size);
833       for (unsigned i = 0; i < backing->num_chunks; ++i)
834          fprintf(stderr, "   %u..%u\n", backing->chunks[i].begin, backing->chunks[i].end);
835    }
836 }
837 #endif
838 
839 /*
840  * Attempt to allocate the given number of backing pages. Fewer pages may be
841  * allocated (depending on the fragmentation of existing backing buffers),
842  * which will be reflected by a change to *pnum_pages.
843  */
844 static struct amdgpu_sparse_backing *
sparse_backing_alloc(struct amdgpu_winsys_bo * bo,uint32_t * pstart_page,uint32_t * pnum_pages)845 sparse_backing_alloc(struct amdgpu_winsys_bo *bo, uint32_t *pstart_page, uint32_t *pnum_pages)
846 {
847    struct amdgpu_sparse_backing *best_backing;
848    unsigned best_idx;
849    uint32_t best_num_pages;
850 
851    best_backing = NULL;
852    best_idx = 0;
853    best_num_pages = 0;
854 
855    /* This is a very simple and inefficient best-fit algorithm. */
856    list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
857       for (unsigned idx = 0; idx < backing->num_chunks; ++idx) {
858          uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin;
859          if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) ||
860             (best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) {
861             best_backing = backing;
862             best_idx = idx;
863             best_num_pages = cur_num_pages;
864          }
865       }
866    }
867 
868    /* Allocate a new backing buffer if necessary. */
869    if (!best_backing) {
870       struct pb_buffer *buf;
871       uint64_t size;
872       uint32_t pages;
873 
874       best_backing = CALLOC_STRUCT(amdgpu_sparse_backing);
875       if (!best_backing)
876          return NULL;
877 
878       best_backing->max_chunks = 4;
879       best_backing->chunks = CALLOC(best_backing->max_chunks,
880                                     sizeof(*best_backing->chunks));
881       if (!best_backing->chunks) {
882          FREE(best_backing);
883          return NULL;
884       }
885 
886       assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, RADEON_SPARSE_PAGE_SIZE));
887 
888       size = MIN3(bo->base.size / 16,
889                   8 * 1024 * 1024,
890                   bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * RADEON_SPARSE_PAGE_SIZE);
891       size = MAX2(size, RADEON_SPARSE_PAGE_SIZE);
892 
893       buf = amdgpu_bo_create(bo->ws, size, RADEON_SPARSE_PAGE_SIZE,
894                              bo->initial_domain,
895                              bo->u.sparse.flags | RADEON_FLAG_NO_SUBALLOC);
896       if (!buf) {
897          FREE(best_backing->chunks);
898          FREE(best_backing);
899          return NULL;
900       }
901 
902       /* We might have gotten a bigger buffer than requested via caching. */
903       pages = buf->size / RADEON_SPARSE_PAGE_SIZE;
904 
905       best_backing->bo = amdgpu_winsys_bo(buf);
906       best_backing->num_chunks = 1;
907       best_backing->chunks[0].begin = 0;
908       best_backing->chunks[0].end = pages;
909 
910       list_add(&best_backing->list, &bo->u.sparse.backing);
911       bo->u.sparse.num_backing_pages += pages;
912 
913       best_idx = 0;
914       best_num_pages = pages;
915    }
916 
917    *pnum_pages = MIN2(*pnum_pages, best_num_pages);
918    *pstart_page = best_backing->chunks[best_idx].begin;
919    best_backing->chunks[best_idx].begin += *pnum_pages;
920 
921    if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) {
922       memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1],
923               sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1));
924       best_backing->num_chunks--;
925    }
926 
927    return best_backing;
928 }
929 
930 static void
sparse_free_backing_buffer(struct amdgpu_winsys_bo * bo,struct amdgpu_sparse_backing * backing)931 sparse_free_backing_buffer(struct amdgpu_winsys_bo *bo,
932                            struct amdgpu_sparse_backing *backing)
933 {
934    struct amdgpu_winsys *ws = backing->bo->ws;
935 
936    bo->u.sparse.num_backing_pages -= backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE;
937 
938    simple_mtx_lock(&ws->bo_fence_lock);
939    amdgpu_add_fences(backing->bo, bo->num_fences, bo->fences);
940    simple_mtx_unlock(&ws->bo_fence_lock);
941 
942    list_del(&backing->list);
943    amdgpu_winsys_bo_reference(&backing->bo, NULL);
944    FREE(backing->chunks);
945    FREE(backing);
946 }
947 
948 /*
949  * Return a range of pages from the given backing buffer back into the
950  * free structure.
951  */
952 static bool
sparse_backing_free(struct amdgpu_winsys_bo * bo,struct amdgpu_sparse_backing * backing,uint32_t start_page,uint32_t num_pages)953 sparse_backing_free(struct amdgpu_winsys_bo *bo,
954                     struct amdgpu_sparse_backing *backing,
955                     uint32_t start_page, uint32_t num_pages)
956 {
957    uint32_t end_page = start_page + num_pages;
958    unsigned low = 0;
959    unsigned high = backing->num_chunks;
960 
961    /* Find the first chunk with begin >= start_page. */
962    while (low < high) {
963       unsigned mid = low + (high - low) / 2;
964 
965       if (backing->chunks[mid].begin >= start_page)
966          high = mid;
967       else
968          low = mid + 1;
969    }
970 
971    assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin);
972    assert(low == 0 || backing->chunks[low - 1].end <= start_page);
973 
974    if (low > 0 && backing->chunks[low - 1].end == start_page) {
975       backing->chunks[low - 1].end = end_page;
976 
977       if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
978          backing->chunks[low - 1].end = backing->chunks[low].end;
979          memmove(&backing->chunks[low], &backing->chunks[low + 1],
980                  sizeof(*backing->chunks) * (backing->num_chunks - low - 1));
981          backing->num_chunks--;
982       }
983    } else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
984       backing->chunks[low].begin = start_page;
985    } else {
986       if (backing->num_chunks >= backing->max_chunks) {
987          unsigned new_max_chunks = 2 * backing->max_chunks;
988          struct amdgpu_sparse_backing_chunk *new_chunks =
989             REALLOC(backing->chunks,
990                     sizeof(*backing->chunks) * backing->max_chunks,
991                     sizeof(*backing->chunks) * new_max_chunks);
992          if (!new_chunks)
993             return false;
994 
995          backing->max_chunks = new_max_chunks;
996          backing->chunks = new_chunks;
997       }
998 
999       memmove(&backing->chunks[low + 1], &backing->chunks[low],
1000               sizeof(*backing->chunks) * (backing->num_chunks - low));
1001       backing->chunks[low].begin = start_page;
1002       backing->chunks[low].end = end_page;
1003       backing->num_chunks++;
1004    }
1005 
1006    if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
1007        backing->chunks[0].end == backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE)
1008       sparse_free_backing_buffer(bo, backing);
1009 
1010    return true;
1011 }
1012 
amdgpu_bo_sparse_destroy(struct pb_buffer * _buf)1013 static void amdgpu_bo_sparse_destroy(struct pb_buffer *_buf)
1014 {
1015    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
1016    int r;
1017 
1018    assert(!bo->bo && bo->sparse);
1019 
1020    r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
1021                            (uint64_t)bo->u.sparse.num_va_pages * RADEON_SPARSE_PAGE_SIZE,
1022                            bo->va, 0, AMDGPU_VA_OP_CLEAR);
1023    if (r) {
1024       fprintf(stderr, "amdgpu: clearing PRT VA region on destroy failed (%d)\n", r);
1025    }
1026 
1027    while (!list_is_empty(&bo->u.sparse.backing)) {
1028       struct amdgpu_sparse_backing *dummy = NULL;
1029       sparse_free_backing_buffer(bo,
1030                                  container_of(bo->u.sparse.backing.next,
1031                                               dummy, list));
1032    }
1033 
1034    amdgpu_va_range_free(bo->u.sparse.va_handle);
1035    FREE(bo->u.sparse.commitments);
1036    simple_mtx_destroy(&bo->lock);
1037    FREE(bo);
1038 }
1039 
1040 static const struct pb_vtbl amdgpu_winsys_bo_sparse_vtbl = {
1041    amdgpu_bo_sparse_destroy
1042    /* other functions are never called */
1043 };
1044 
1045 static struct pb_buffer *
amdgpu_bo_sparse_create(struct amdgpu_winsys * ws,uint64_t size,enum radeon_bo_domain domain,enum radeon_bo_flag flags)1046 amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,
1047                         enum radeon_bo_domain domain,
1048                         enum radeon_bo_flag flags)
1049 {
1050    struct amdgpu_winsys_bo *bo;
1051    uint64_t map_size;
1052    uint64_t va_gap_size;
1053    int r;
1054 
1055    /* We use 32-bit page numbers; refuse to attempt allocating sparse buffers
1056     * that exceed this limit. This is not really a restriction: we don't have
1057     * that much virtual address space anyway.
1058     */
1059    if (size > (uint64_t)INT32_MAX * RADEON_SPARSE_PAGE_SIZE)
1060       return NULL;
1061 
1062    bo = CALLOC_STRUCT(amdgpu_winsys_bo);
1063    if (!bo)
1064       return NULL;
1065 
1066    simple_mtx_init(&bo->lock, mtx_plain);
1067    pipe_reference_init(&bo->base.reference, 1);
1068    bo->base.alignment = RADEON_SPARSE_PAGE_SIZE;
1069    bo->base.size = size;
1070    bo->base.vtbl = &amdgpu_winsys_bo_sparse_vtbl;
1071    bo->ws = ws;
1072    bo->initial_domain = domain;
1073    bo->unique_id =  __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
1074    bo->sparse = true;
1075    bo->u.sparse.flags = flags & ~RADEON_FLAG_SPARSE;
1076 
1077    bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
1078    bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
1079                                      sizeof(*bo->u.sparse.commitments));
1080    if (!bo->u.sparse.commitments)
1081       goto error_alloc_commitments;
1082 
1083    list_inithead(&bo->u.sparse.backing);
1084 
1085    /* For simplicity, we always map a multiple of the page size. */
1086    map_size = align64(size, RADEON_SPARSE_PAGE_SIZE);
1087    va_gap_size = ws->check_vm ? 4 * RADEON_SPARSE_PAGE_SIZE : 0;
1088    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
1089                              map_size + va_gap_size, RADEON_SPARSE_PAGE_SIZE,
1090                              0, &bo->va, &bo->u.sparse.va_handle,
1091 			     AMDGPU_VA_RANGE_HIGH);
1092    if (r)
1093       goto error_va_alloc;
1094 
1095    r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0, size, bo->va,
1096                            AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
1097    if (r)
1098       goto error_va_map;
1099 
1100    return &bo->base;
1101 
1102 error_va_map:
1103    amdgpu_va_range_free(bo->u.sparse.va_handle);
1104 error_va_alloc:
1105    FREE(bo->u.sparse.commitments);
1106 error_alloc_commitments:
1107    simple_mtx_destroy(&bo->lock);
1108    FREE(bo);
1109    return NULL;
1110 }
1111 
1112 static bool
amdgpu_bo_sparse_commit(struct pb_buffer * buf,uint64_t offset,uint64_t size,bool commit)1113 amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,
1114                         bool commit)
1115 {
1116    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf);
1117    struct amdgpu_sparse_commitment *comm;
1118    uint32_t va_page, end_va_page;
1119    bool ok = true;
1120    int r;
1121 
1122    assert(bo->sparse);
1123    assert(offset % RADEON_SPARSE_PAGE_SIZE == 0);
1124    assert(offset <= bo->base.size);
1125    assert(size <= bo->base.size - offset);
1126    assert(size % RADEON_SPARSE_PAGE_SIZE == 0 || offset + size == bo->base.size);
1127 
1128    comm = bo->u.sparse.commitments;
1129    va_page = offset / RADEON_SPARSE_PAGE_SIZE;
1130    end_va_page = va_page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
1131 
1132    simple_mtx_lock(&bo->lock);
1133 
1134 #if DEBUG_SPARSE_COMMITS
1135    sparse_dump(bo, __func__);
1136 #endif
1137 
1138    if (commit) {
1139       while (va_page < end_va_page) {
1140          uint32_t span_va_page;
1141 
1142          /* Skip pages that are already committed. */
1143          if (comm[va_page].backing) {
1144             va_page++;
1145             continue;
1146          }
1147 
1148          /* Determine length of uncommitted span. */
1149          span_va_page = va_page;
1150          while (va_page < end_va_page && !comm[va_page].backing)
1151             va_page++;
1152 
1153          /* Fill the uncommitted span with chunks of backing memory. */
1154          while (span_va_page < va_page) {
1155             struct amdgpu_sparse_backing *backing;
1156             uint32_t backing_start, backing_size;
1157 
1158             backing_size = va_page - span_va_page;
1159             backing = sparse_backing_alloc(bo, &backing_start, &backing_size);
1160             if (!backing) {
1161                ok = false;
1162                goto out;
1163             }
1164 
1165             r = amdgpu_bo_va_op_raw(bo->ws->dev, backing->bo->bo,
1166                                     (uint64_t)backing_start * RADEON_SPARSE_PAGE_SIZE,
1167                                     (uint64_t)backing_size * RADEON_SPARSE_PAGE_SIZE,
1168                                     bo->va + (uint64_t)span_va_page * RADEON_SPARSE_PAGE_SIZE,
1169                                     AMDGPU_VM_PAGE_READABLE |
1170                                     AMDGPU_VM_PAGE_WRITEABLE |
1171                                     AMDGPU_VM_PAGE_EXECUTABLE,
1172                                     AMDGPU_VA_OP_REPLACE);
1173             if (r) {
1174                ok = sparse_backing_free(bo, backing, backing_start, backing_size);
1175                assert(ok && "sufficient memory should already be allocated");
1176 
1177                ok = false;
1178                goto out;
1179             }
1180 
1181             while (backing_size) {
1182                comm[span_va_page].backing = backing;
1183                comm[span_va_page].page = backing_start;
1184                span_va_page++;
1185                backing_start++;
1186                backing_size--;
1187             }
1188          }
1189       }
1190    } else {
1191       r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
1192                               (uint64_t)(end_va_page - va_page) * RADEON_SPARSE_PAGE_SIZE,
1193                               bo->va + (uint64_t)va_page * RADEON_SPARSE_PAGE_SIZE,
1194                               AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE);
1195       if (r) {
1196          ok = false;
1197          goto out;
1198       }
1199 
1200       while (va_page < end_va_page) {
1201          struct amdgpu_sparse_backing *backing;
1202          uint32_t backing_start;
1203          uint32_t span_pages;
1204 
1205          /* Skip pages that are already uncommitted. */
1206          if (!comm[va_page].backing) {
1207             va_page++;
1208             continue;
1209          }
1210 
1211          /* Group contiguous spans of pages. */
1212          backing = comm[va_page].backing;
1213          backing_start = comm[va_page].page;
1214          comm[va_page].backing = NULL;
1215 
1216          span_pages = 1;
1217          va_page++;
1218 
1219          while (va_page < end_va_page &&
1220                 comm[va_page].backing == backing &&
1221                 comm[va_page].page == backing_start + span_pages) {
1222             comm[va_page].backing = NULL;
1223             va_page++;
1224             span_pages++;
1225          }
1226 
1227          if (!sparse_backing_free(bo, backing, backing_start, span_pages)) {
1228             /* Couldn't allocate tracking data structures, so we have to leak */
1229             fprintf(stderr, "amdgpu: leaking PRT backing memory\n");
1230             ok = false;
1231          }
1232       }
1233    }
1234 out:
1235 
1236    simple_mtx_unlock(&bo->lock);
1237 
1238    return ok;
1239 }
1240 
amdgpu_buffer_get_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)1241 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
1242                                        struct radeon_bo_metadata *md,
1243                                        struct radeon_surf *surf)
1244 {
1245    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
1246    struct amdgpu_bo_info info = {0};
1247    int r;
1248 
1249    assert(bo->bo && "must not be called for slab entries");
1250 
1251    r = amdgpu_bo_query_info(bo->bo, &info);
1252    if (r)
1253       return;
1254 
1255    ac_surface_set_bo_metadata(&bo->ws->info, surf, info.metadata.tiling_info,
1256                               &md->mode);
1257 
1258    md->size_metadata = info.metadata.size_metadata;
1259    memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
1260 }
1261 
amdgpu_buffer_set_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)1262 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
1263                                        struct radeon_bo_metadata *md,
1264                                        struct radeon_surf *surf)
1265 {
1266    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
1267    struct amdgpu_bo_metadata metadata = {0};
1268 
1269    assert(bo->bo && "must not be called for slab entries");
1270 
1271    ac_surface_get_bo_metadata(&bo->ws->info, surf, &metadata.tiling_info);
1272 
1273    metadata.size_metadata = md->size_metadata;
1274    memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
1275 
1276    amdgpu_bo_set_metadata(bo->bo, &metadata);
1277 }
1278 
1279 struct pb_buffer *
amdgpu_bo_create(struct amdgpu_winsys * ws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)1280 amdgpu_bo_create(struct amdgpu_winsys *ws,
1281                  uint64_t size,
1282                  unsigned alignment,
1283                  enum radeon_bo_domain domain,
1284                  enum radeon_bo_flag flags)
1285 {
1286    struct amdgpu_winsys_bo *bo;
1287    int heap = -1;
1288 
1289    if (domain & (RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA))
1290       flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_SUBALLOC;
1291 
1292    /* VRAM implies WC. This is not optional. */
1293    assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
1294 
1295    /* NO_CPU_ACCESS is not valid with GTT. */
1296    assert(!(domain & RADEON_DOMAIN_GTT) || !(flags & RADEON_FLAG_NO_CPU_ACCESS));
1297 
1298    /* Sparse buffers must have NO_CPU_ACCESS set. */
1299    assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS);
1300 
1301    struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
1302       ws->bo_slabs_encrypted : ws->bo_slabs;
1303    struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1];
1304    unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
1305 
1306    /* Sub-allocate small buffers from slabs. */
1307    if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) &&
1308        size <= max_slab_entry_size &&
1309        /* The alignment must be at most the size of the smallest slab entry or
1310         * the next power of two. */
1311        alignment <= MAX2(1 << slabs[0].min_order, util_next_power_of_two(size))) {
1312       struct pb_slab_entry *entry;
1313       int heap = radeon_get_heap_index(domain, flags);
1314 
1315       if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
1316          goto no_slab;
1317 
1318       struct pb_slabs *slabs = get_slabs(ws, size, flags);
1319       entry = pb_slab_alloc(slabs, size, heap);
1320       if (!entry) {
1321          /* Clean up buffer managers and try again. */
1322          amdgpu_clean_up_buffer_managers(ws);
1323 
1324          entry = pb_slab_alloc(slabs, size, heap);
1325       }
1326       if (!entry)
1327          return NULL;
1328 
1329       bo = NULL;
1330       bo = container_of(entry, bo, u.slab.entry);
1331 
1332       pipe_reference_init(&bo->base.reference, 1);
1333 
1334       return &bo->base;
1335    }
1336 no_slab:
1337 
1338    if (flags & RADEON_FLAG_SPARSE) {
1339       assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
1340 
1341       return amdgpu_bo_sparse_create(ws, size, domain, flags);
1342    }
1343 
1344    /* This flag is irrelevant for the cache. */
1345    flags &= ~RADEON_FLAG_NO_SUBALLOC;
1346 
1347    /* Align size to page size. This is the minimum alignment for normal
1348     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1349     * like constant/uniform buffers, can benefit from better and more reuse.
1350     */
1351    if (domain & RADEON_DOMAIN_VRAM_GTT) {
1352       size = align64(size, ws->info.gart_page_size);
1353       alignment = align(alignment, ws->info.gart_page_size);
1354    }
1355 
1356    bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
1357 
1358    if (use_reusable_pool) {
1359        heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_ENCRYPTED);
1360        assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
1361 
1362        /* Get a buffer from the cache. */
1363        bo = (struct amdgpu_winsys_bo*)
1364             pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap);
1365        if (bo)
1366           return &bo->base;
1367    }
1368 
1369    /* Create a new one. */
1370    bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
1371    if (!bo) {
1372       /* Clean up buffer managers and try again. */
1373       amdgpu_clean_up_buffer_managers(ws);
1374 
1375       bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
1376       if (!bo)
1377          return NULL;
1378    }
1379 
1380    bo->u.real.use_reusable_pool = use_reusable_pool;
1381    return &bo->base;
1382 }
1383 
1384 static struct pb_buffer *
amdgpu_buffer_create(struct radeon_winsys * ws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)1385 amdgpu_buffer_create(struct radeon_winsys *ws,
1386                      uint64_t size,
1387                      unsigned alignment,
1388                      enum radeon_bo_domain domain,
1389                      enum radeon_bo_flag flags)
1390 {
1391    struct pb_buffer * res = amdgpu_bo_create(amdgpu_winsys(ws), size, alignment, domain,
1392                            flags);
1393    return res;
1394 }
1395 
amdgpu_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned vm_alignment)1396 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
1397                                                struct winsys_handle *whandle,
1398                                                unsigned vm_alignment)
1399 {
1400    struct amdgpu_winsys *ws = amdgpu_winsys(rws);
1401    struct amdgpu_winsys_bo *bo = NULL;
1402    enum amdgpu_bo_handle_type type;
1403    struct amdgpu_bo_import_result result = {0};
1404    uint64_t va;
1405    amdgpu_va_handle va_handle = NULL;
1406    struct amdgpu_bo_info info = {0};
1407    enum radeon_bo_domain initial = 0;
1408    enum radeon_bo_flag flags = 0;
1409    int r;
1410 
1411    switch (whandle->type) {
1412    case WINSYS_HANDLE_TYPE_SHARED:
1413       type = amdgpu_bo_handle_type_gem_flink_name;
1414       break;
1415    case WINSYS_HANDLE_TYPE_FD:
1416       type = amdgpu_bo_handle_type_dma_buf_fd;
1417       break;
1418    default:
1419       return NULL;
1420    }
1421 
1422    r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
1423    if (r)
1424       return NULL;
1425 
1426    simple_mtx_lock(&ws->bo_export_table_lock);
1427    bo = util_hash_table_get(ws->bo_export_table, result.buf_handle);
1428 
1429    /* If the amdgpu_winsys_bo instance already exists, bump the reference
1430     * counter and return it.
1431     */
1432    if (bo) {
1433       p_atomic_inc(&bo->base.reference.count);
1434       simple_mtx_unlock(&ws->bo_export_table_lock);
1435 
1436       /* Release the buffer handle, because we don't need it anymore.
1437        * This function is returning an existing buffer, which has its own
1438        * handle.
1439        */
1440       amdgpu_bo_free(result.buf_handle);
1441       return &bo->base;
1442    }
1443 
1444    /* Get initial domains. */
1445    r = amdgpu_bo_query_info(result.buf_handle, &info);
1446    if (r)
1447       goto error;
1448 
1449    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
1450                              result.alloc_size,
1451                              amdgpu_get_optimal_alignment(ws, result.alloc_size,
1452                                                           vm_alignment),
1453                              0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
1454    if (r)
1455       goto error;
1456 
1457    bo = CALLOC_STRUCT(amdgpu_winsys_bo);
1458    if (!bo)
1459       goto error;
1460 
1461    r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
1462    if (r)
1463       goto error;
1464 
1465    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
1466       initial |= RADEON_DOMAIN_VRAM;
1467    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
1468       initial |= RADEON_DOMAIN_GTT;
1469    if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
1470       flags |= RADEON_FLAG_NO_CPU_ACCESS;
1471    if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
1472       flags |= RADEON_FLAG_GTT_WC;
1473    if (info.alloc_flags & AMDGPU_GEM_CREATE_ENCRYPTED) {
1474       /* Imports are always possible even if the importer isn't using TMZ.
1475        * For instance libweston needs to import the buffer to be able to determine
1476        * if it can be used for scanout.
1477        */
1478       flags |= RADEON_FLAG_ENCRYPTED;
1479    }
1480 
1481    /* Initialize the structure. */
1482    simple_mtx_init(&bo->lock, mtx_plain);
1483    pipe_reference_init(&bo->base.reference, 1);
1484    bo->base.alignment = info.phys_alignment;
1485    bo->bo = result.buf_handle;
1486    bo->base.size = result.alloc_size;
1487    bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
1488    bo->ws = ws;
1489    bo->va = va;
1490    bo->u.real.va_handle = va_handle;
1491    bo->initial_domain = initial;
1492    bo->flags = flags;
1493    bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
1494    bo->is_shared = true;
1495 
1496    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1497       ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
1498    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1499       ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
1500 
1501    amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
1502 
1503    amdgpu_add_buffer_to_global_list(bo);
1504 
1505    _mesa_hash_table_insert(ws->bo_export_table, bo->bo, bo);
1506    simple_mtx_unlock(&ws->bo_export_table_lock);
1507 
1508    return &bo->base;
1509 
1510 error:
1511    simple_mtx_unlock(&ws->bo_export_table_lock);
1512    if (bo)
1513       FREE(bo);
1514    if (va_handle)
1515       amdgpu_va_range_free(va_handle);
1516    amdgpu_bo_free(result.buf_handle);
1517    return NULL;
1518 }
1519 
amdgpu_bo_get_handle(struct radeon_winsys * rws,struct pb_buffer * buffer,struct winsys_handle * whandle)1520 static bool amdgpu_bo_get_handle(struct radeon_winsys *rws,
1521                                  struct pb_buffer *buffer,
1522                                  struct winsys_handle *whandle)
1523 {
1524    struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws);
1525    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
1526    struct amdgpu_winsys *ws = bo->ws;
1527    enum amdgpu_bo_handle_type type;
1528    struct hash_entry *entry;
1529    int r;
1530 
1531    /* Don't allow exports of slab entries and sparse buffers. */
1532    if (!bo->bo)
1533       return false;
1534 
1535    bo->u.real.use_reusable_pool = false;
1536 
1537    switch (whandle->type) {
1538    case WINSYS_HANDLE_TYPE_SHARED:
1539       type = amdgpu_bo_handle_type_gem_flink_name;
1540       break;
1541    case WINSYS_HANDLE_TYPE_KMS:
1542       if (sws->fd == ws->fd) {
1543          whandle->handle = bo->u.real.kms_handle;
1544 
1545          if (bo->is_shared)
1546             return true;
1547 
1548          goto hash_table_set;
1549       }
1550 
1551       simple_mtx_lock(&ws->sws_list_lock);
1552       entry = _mesa_hash_table_search(sws->kms_handles, bo);
1553       simple_mtx_unlock(&ws->sws_list_lock);
1554       if (entry) {
1555          whandle->handle = (uintptr_t)entry->data;
1556          return true;
1557       }
1558       /* Fall through */
1559    case WINSYS_HANDLE_TYPE_FD:
1560       type = amdgpu_bo_handle_type_dma_buf_fd;
1561       break;
1562    default:
1563       return false;
1564    }
1565 
1566    r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
1567    if (r)
1568       return false;
1569 
1570    if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1571       int dma_fd = whandle->handle;
1572 
1573       r = drmPrimeFDToHandle(sws->fd, dma_fd, &whandle->handle);
1574       close(dma_fd);
1575 
1576       if (r)
1577          return false;
1578 
1579       simple_mtx_lock(&ws->sws_list_lock);
1580       _mesa_hash_table_insert_pre_hashed(sws->kms_handles,
1581                                          bo->u.real.kms_handle, bo,
1582                                          (void*)(uintptr_t)whandle->handle);
1583       simple_mtx_unlock(&ws->sws_list_lock);
1584    }
1585 
1586  hash_table_set:
1587    simple_mtx_lock(&ws->bo_export_table_lock);
1588    _mesa_hash_table_insert(ws->bo_export_table, bo->bo, bo);
1589    simple_mtx_unlock(&ws->bo_export_table_lock);
1590 
1591    bo->is_shared = true;
1592    return true;
1593 }
1594 
amdgpu_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size)1595 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
1596 					    void *pointer, uint64_t size)
1597 {
1598     struct amdgpu_winsys *ws = amdgpu_winsys(rws);
1599     amdgpu_bo_handle buf_handle;
1600     struct amdgpu_winsys_bo *bo;
1601     uint64_t va;
1602     amdgpu_va_handle va_handle;
1603     /* Avoid failure when the size is not page aligned */
1604     uint64_t aligned_size = align64(size, ws->info.gart_page_size);
1605 
1606     bo = CALLOC_STRUCT(amdgpu_winsys_bo);
1607     if (!bo)
1608         return NULL;
1609 
1610     if (amdgpu_create_bo_from_user_mem(ws->dev, pointer,
1611                                        aligned_size, &buf_handle))
1612         goto error;
1613 
1614     if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
1615                               aligned_size,
1616                               amdgpu_get_optimal_alignment(ws, aligned_size,
1617                                                            ws->info.gart_page_size),
1618                               0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH))
1619         goto error_va_alloc;
1620 
1621     if (amdgpu_bo_va_op(buf_handle, 0, aligned_size, va, 0, AMDGPU_VA_OP_MAP))
1622         goto error_va_map;
1623 
1624     /* Initialize it. */
1625     bo->is_user_ptr = true;
1626     pipe_reference_init(&bo->base.reference, 1);
1627     simple_mtx_init(&bo->lock, mtx_plain);
1628     bo->bo = buf_handle;
1629     bo->base.alignment = 0;
1630     bo->base.size = size;
1631     bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
1632     bo->ws = ws;
1633     bo->cpu_ptr = pointer;
1634     bo->va = va;
1635     bo->u.real.va_handle = va_handle;
1636     bo->initial_domain = RADEON_DOMAIN_GTT;
1637     bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
1638 
1639     ws->allocated_gtt += aligned_size;
1640 
1641     amdgpu_add_buffer_to_global_list(bo);
1642 
1643     amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
1644 
1645     return (struct pb_buffer*)bo;
1646 
1647 error_va_map:
1648     amdgpu_va_range_free(va_handle);
1649 
1650 error_va_alloc:
1651     amdgpu_bo_free(buf_handle);
1652 
1653 error:
1654     FREE(bo);
1655     return NULL;
1656 }
1657 
amdgpu_bo_is_user_ptr(struct pb_buffer * buf)1658 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
1659 {
1660    return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr;
1661 }
1662 
amdgpu_bo_is_suballocated(struct pb_buffer * buf)1663 static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf)
1664 {
1665    struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
1666 
1667    return !bo->bo && !bo->sparse;
1668 }
1669 
amdgpu_bo_get_va(struct pb_buffer * buf)1670 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
1671 {
1672    return ((struct amdgpu_winsys_bo*)buf)->va;
1673 }
1674 
amdgpu_bo_init_functions(struct amdgpu_screen_winsys * ws)1675 void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *ws)
1676 {
1677    ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
1678    ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
1679    ws->base.buffer_map = amdgpu_bo_map;
1680    ws->base.buffer_unmap = amdgpu_bo_unmap;
1681    ws->base.buffer_wait = amdgpu_bo_wait;
1682    ws->base.buffer_create = amdgpu_buffer_create;
1683    ws->base.buffer_from_handle = amdgpu_bo_from_handle;
1684    ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
1685    ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
1686    ws->base.buffer_is_suballocated = amdgpu_bo_is_suballocated;
1687    ws->base.buffer_get_handle = amdgpu_bo_get_handle;
1688    ws->base.buffer_commit = amdgpu_bo_sparse_commit;
1689    ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
1690    ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
1691    ws->base.buffer_get_flags = amdgpu_bo_get_flags;
1692 }
1693