1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 */
31
32 #include "amdgpu_cs.h"
33
34 #include "os/os_time.h"
35 #include "state_tracker/drm_driver.h"
36 #include <amdgpu_drm.h>
37 #include <xf86drm.h>
38 #include <stdio.h>
39 #include <inttypes.h>
40
41 static struct pb_buffer *
42 amdgpu_bo_create(struct radeon_winsys *rws,
43 uint64_t size,
44 unsigned alignment,
45 enum radeon_bo_domain domain,
46 enum radeon_bo_flag flags);
47
amdgpu_bo_wait(struct pb_buffer * _buf,uint64_t timeout,enum radeon_bo_usage usage)48 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
49 enum radeon_bo_usage usage)
50 {
51 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
52 struct amdgpu_winsys *ws = bo->ws;
53 int64_t abs_timeout;
54
55 if (timeout == 0) {
56 if (p_atomic_read(&bo->num_active_ioctls))
57 return false;
58
59 } else {
60 abs_timeout = os_time_get_absolute_timeout(timeout);
61
62 /* Wait if any ioctl is being submitted with this buffer. */
63 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
64 return false;
65 }
66
67 if (bo->is_shared) {
68 /* We can't use user fences for shared buffers, because user fences
69 * are local to this process only. If we want to wait for all buffer
70 * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
71 */
72 bool buffer_busy = true;
73 int r;
74
75 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
76 if (r)
77 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
78 r);
79 return !buffer_busy;
80 }
81
82 if (timeout == 0) {
83 unsigned idle_fences;
84 bool buffer_idle;
85
86 pipe_mutex_lock(ws->bo_fence_lock);
87
88 for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
89 if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
90 break;
91 }
92
93 /* Release the idle fences to avoid checking them again later. */
94 for (unsigned i = 0; i < idle_fences; ++i)
95 amdgpu_fence_reference(&bo->fences[i], NULL);
96
97 memmove(&bo->fences[0], &bo->fences[idle_fences],
98 (bo->num_fences - idle_fences) * sizeof(*bo->fences));
99 bo->num_fences -= idle_fences;
100
101 buffer_idle = !bo->num_fences;
102 pipe_mutex_unlock(ws->bo_fence_lock);
103
104 return buffer_idle;
105 } else {
106 bool buffer_idle = true;
107
108 pipe_mutex_lock(ws->bo_fence_lock);
109 while (bo->num_fences && buffer_idle) {
110 struct pipe_fence_handle *fence = NULL;
111 bool fence_idle = false;
112
113 amdgpu_fence_reference(&fence, bo->fences[0]);
114
115 /* Wait for the fence. */
116 pipe_mutex_unlock(ws->bo_fence_lock);
117 if (amdgpu_fence_wait(fence, abs_timeout, true))
118 fence_idle = true;
119 else
120 buffer_idle = false;
121 pipe_mutex_lock(ws->bo_fence_lock);
122
123 /* Release an idle fence to avoid checking it again later, keeping in
124 * mind that the fence array may have been modified by other threads.
125 */
126 if (fence_idle && bo->num_fences && bo->fences[0] == fence) {
127 amdgpu_fence_reference(&bo->fences[0], NULL);
128 memmove(&bo->fences[0], &bo->fences[1],
129 (bo->num_fences - 1) * sizeof(*bo->fences));
130 bo->num_fences--;
131 }
132
133 amdgpu_fence_reference(&fence, NULL);
134 }
135 pipe_mutex_unlock(ws->bo_fence_lock);
136
137 return buffer_idle;
138 }
139 }
140
amdgpu_bo_get_initial_domain(struct pb_buffer * buf)141 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
142 struct pb_buffer *buf)
143 {
144 return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
145 }
146
amdgpu_bo_remove_fences(struct amdgpu_winsys_bo * bo)147 static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)
148 {
149 for (unsigned i = 0; i < bo->num_fences; ++i)
150 amdgpu_fence_reference(&bo->fences[i], NULL);
151
152 FREE(bo->fences);
153 bo->num_fences = 0;
154 bo->max_fences = 0;
155 }
156
amdgpu_bo_destroy(struct pb_buffer * _buf)157 void amdgpu_bo_destroy(struct pb_buffer *_buf)
158 {
159 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
160
161 assert(bo->bo && "must not be called for slab entries");
162
163 pipe_mutex_lock(bo->ws->global_bo_list_lock);
164 LIST_DEL(&bo->u.real.global_list_item);
165 bo->ws->num_buffers--;
166 pipe_mutex_unlock(bo->ws->global_bo_list_lock);
167
168 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
169 amdgpu_va_range_free(bo->u.real.va_handle);
170 amdgpu_bo_free(bo->bo);
171
172 amdgpu_bo_remove_fences(bo);
173
174 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
175 bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
176 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
177 bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size);
178
179 if (bo->u.real.map_count >= 1) {
180 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
181 bo->ws->mapped_vram -= bo->base.size;
182 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
183 bo->ws->mapped_gtt -= bo->base.size;
184 }
185
186 FREE(bo);
187 }
188
amdgpu_bo_destroy_or_cache(struct pb_buffer * _buf)189 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
190 {
191 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
192
193 assert(bo->bo); /* slab buffers have a separate vtbl */
194
195 if (bo->u.real.use_reusable_pool)
196 pb_cache_add_buffer(&bo->u.real.cache_entry);
197 else
198 amdgpu_bo_destroy(_buf);
199 }
200
amdgpu_bo_map(struct pb_buffer * buf,struct radeon_winsys_cs * rcs,enum pipe_transfer_usage usage)201 static void *amdgpu_bo_map(struct pb_buffer *buf,
202 struct radeon_winsys_cs *rcs,
203 enum pipe_transfer_usage usage)
204 {
205 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
206 struct amdgpu_winsys_bo *real;
207 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
208 int r;
209 void *cpu = NULL;
210 uint64_t offset = 0;
211
212 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
213 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
214 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
215 if (usage & PIPE_TRANSFER_DONTBLOCK) {
216 if (!(usage & PIPE_TRANSFER_WRITE)) {
217 /* Mapping for read.
218 *
219 * Since we are mapping for read, we don't need to wait
220 * if the GPU is using the buffer for read too
221 * (neither one is changing it).
222 *
223 * Only check whether the buffer is being used for write. */
224 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
225 RADEON_USAGE_WRITE)) {
226 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
227 return NULL;
228 }
229
230 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
231 RADEON_USAGE_WRITE)) {
232 return NULL;
233 }
234 } else {
235 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
236 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
237 return NULL;
238 }
239
240 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
241 RADEON_USAGE_READWRITE)) {
242 return NULL;
243 }
244 }
245 } else {
246 uint64_t time = os_time_get_nano();
247
248 if (!(usage & PIPE_TRANSFER_WRITE)) {
249 /* Mapping for read.
250 *
251 * Since we are mapping for read, we don't need to wait
252 * if the GPU is using the buffer for read too
253 * (neither one is changing it).
254 *
255 * Only check whether the buffer is being used for write. */
256 if (cs) {
257 if (amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
258 RADEON_USAGE_WRITE)) {
259 cs->flush_cs(cs->flush_data, 0, NULL);
260 } else {
261 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
262 if (p_atomic_read(&bo->num_active_ioctls))
263 amdgpu_cs_sync_flush(rcs);
264 }
265 }
266
267 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
268 RADEON_USAGE_WRITE);
269 } else {
270 /* Mapping for write. */
271 if (cs) {
272 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
273 cs->flush_cs(cs->flush_data, 0, NULL);
274 } else {
275 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
276 if (p_atomic_read(&bo->num_active_ioctls))
277 amdgpu_cs_sync_flush(rcs);
278 }
279 }
280
281 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
282 RADEON_USAGE_READWRITE);
283 }
284
285 bo->ws->buffer_wait_time += os_time_get_nano() - time;
286 }
287 }
288
289 /* If the buffer is created from user memory, return the user pointer. */
290 if (bo->user_ptr)
291 return bo->user_ptr;
292
293 if (bo->bo) {
294 real = bo;
295 } else {
296 real = bo->u.slab.real;
297 offset = bo->va - real->va;
298 }
299
300 r = amdgpu_bo_cpu_map(real->bo, &cpu);
301 if (r) {
302 /* Clear the cache and try again. */
303 pb_cache_release_all_buffers(&real->ws->bo_cache);
304 r = amdgpu_bo_cpu_map(real->bo, &cpu);
305 if (r)
306 return NULL;
307 }
308
309 if (p_atomic_inc_return(&real->u.real.map_count) == 1) {
310 if (real->initial_domain & RADEON_DOMAIN_VRAM)
311 real->ws->mapped_vram += real->base.size;
312 else if (real->initial_domain & RADEON_DOMAIN_GTT)
313 real->ws->mapped_gtt += real->base.size;
314 }
315 return (uint8_t*)cpu + offset;
316 }
317
amdgpu_bo_unmap(struct pb_buffer * buf)318 static void amdgpu_bo_unmap(struct pb_buffer *buf)
319 {
320 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
321 struct amdgpu_winsys_bo *real;
322
323 if (bo->user_ptr)
324 return;
325
326 real = bo->bo ? bo : bo->u.slab.real;
327
328 if (p_atomic_dec_zero(&real->u.real.map_count)) {
329 if (real->initial_domain & RADEON_DOMAIN_VRAM)
330 real->ws->mapped_vram -= real->base.size;
331 else if (real->initial_domain & RADEON_DOMAIN_GTT)
332 real->ws->mapped_gtt -= real->base.size;
333 }
334
335 amdgpu_bo_cpu_unmap(real->bo);
336 }
337
338 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
339 amdgpu_bo_destroy_or_cache
340 /* other functions are never called */
341 };
342
amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo * bo)343 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
344 {
345 struct amdgpu_winsys *ws = bo->ws;
346
347 assert(bo->bo);
348
349 pipe_mutex_lock(ws->global_bo_list_lock);
350 LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
351 ws->num_buffers++;
352 pipe_mutex_unlock(ws->global_bo_list_lock);
353 }
354
amdgpu_create_bo(struct amdgpu_winsys * ws,uint64_t size,unsigned alignment,unsigned usage,enum radeon_bo_domain initial_domain,unsigned flags,unsigned pb_cache_bucket)355 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
356 uint64_t size,
357 unsigned alignment,
358 unsigned usage,
359 enum radeon_bo_domain initial_domain,
360 unsigned flags,
361 unsigned pb_cache_bucket)
362 {
363 struct amdgpu_bo_alloc_request request = {0};
364 amdgpu_bo_handle buf_handle;
365 uint64_t va = 0;
366 struct amdgpu_winsys_bo *bo;
367 amdgpu_va_handle va_handle;
368 unsigned va_gap_size;
369 int r;
370
371 assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
372 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
373 if (!bo) {
374 return NULL;
375 }
376
377 pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
378 pb_cache_bucket);
379 request.alloc_size = size;
380 request.phys_alignment = alignment;
381
382 if (initial_domain & RADEON_DOMAIN_VRAM)
383 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
384 if (initial_domain & RADEON_DOMAIN_GTT)
385 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
386
387 if (flags & RADEON_FLAG_CPU_ACCESS)
388 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
389 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
390 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
391 if (flags & RADEON_FLAG_GTT_WC)
392 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
393
394 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
395 if (r) {
396 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
397 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
398 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
399 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
400 goto error_bo_alloc;
401 }
402
403 va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
404 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
405 size + va_gap_size, alignment, 0, &va, &va_handle, 0);
406 if (r)
407 goto error_va_alloc;
408
409 r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
410 if (r)
411 goto error_va_map;
412
413 pipe_reference_init(&bo->base.reference, 1);
414 bo->base.alignment = alignment;
415 bo->base.usage = usage;
416 bo->base.size = size;
417 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
418 bo->ws = ws;
419 bo->bo = buf_handle;
420 bo->va = va;
421 bo->u.real.va_handle = va_handle;
422 bo->initial_domain = initial_domain;
423 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
424
425 if (initial_domain & RADEON_DOMAIN_VRAM)
426 ws->allocated_vram += align64(size, ws->info.gart_page_size);
427 else if (initial_domain & RADEON_DOMAIN_GTT)
428 ws->allocated_gtt += align64(size, ws->info.gart_page_size);
429
430 amdgpu_add_buffer_to_global_list(bo);
431
432 return bo;
433
434 error_va_map:
435 amdgpu_va_range_free(va_handle);
436
437 error_va_alloc:
438 amdgpu_bo_free(buf_handle);
439
440 error_bo_alloc:
441 FREE(bo);
442 return NULL;
443 }
444
amdgpu_bo_can_reclaim(struct pb_buffer * _buf)445 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
446 {
447 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
448
449 if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
450 return false;
451 }
452
453 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
454 }
455
amdgpu_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)456 bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
457 {
458 struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */
459 bo = container_of(entry, bo, u.slab.entry);
460
461 return amdgpu_bo_can_reclaim(&bo->base);
462 }
463
amdgpu_bo_slab_destroy(struct pb_buffer * _buf)464 static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
465 {
466 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
467
468 assert(!bo->bo);
469
470 pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry);
471 }
472
473 static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = {
474 amdgpu_bo_slab_destroy
475 /* other functions are never called */
476 };
477
amdgpu_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)478 struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
479 unsigned entry_size,
480 unsigned group_index)
481 {
482 struct amdgpu_winsys *ws = priv;
483 struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
484 enum radeon_bo_domain domains;
485 enum radeon_bo_flag flags = 0;
486 uint32_t base_id;
487
488 if (!slab)
489 return NULL;
490
491 if (heap & 1)
492 flags |= RADEON_FLAG_GTT_WC;
493 if (heap & 2)
494 flags |= RADEON_FLAG_CPU_ACCESS;
495
496 switch (heap >> 2) {
497 case 0:
498 domains = RADEON_DOMAIN_VRAM;
499 break;
500 default:
501 case 1:
502 domains = RADEON_DOMAIN_VRAM_GTT;
503 break;
504 case 2:
505 domains = RADEON_DOMAIN_GTT;
506 break;
507 }
508
509 slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base,
510 64 * 1024, 64 * 1024,
511 domains, flags));
512 if (!slab->buffer)
513 goto fail;
514
515 assert(slab->buffer->bo);
516
517 slab->base.num_entries = slab->buffer->base.size / entry_size;
518 slab->base.num_free = slab->base.num_entries;
519 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
520 if (!slab->entries)
521 goto fail_buffer;
522
523 LIST_INITHEAD(&slab->base.free);
524
525 base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries);
526
527 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
528 struct amdgpu_winsys_bo *bo = &slab->entries[i];
529
530 bo->base.alignment = entry_size;
531 bo->base.usage = slab->buffer->base.usage;
532 bo->base.size = entry_size;
533 bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl;
534 bo->ws = ws;
535 bo->va = slab->buffer->va + i * entry_size;
536 bo->initial_domain = domains;
537 bo->unique_id = base_id + i;
538 bo->u.slab.entry.slab = &slab->base;
539 bo->u.slab.entry.group_index = group_index;
540 bo->u.slab.real = slab->buffer;
541
542 LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
543 }
544
545 return &slab->base;
546
547 fail_buffer:
548 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
549 fail:
550 FREE(slab);
551 return NULL;
552 }
553
amdgpu_bo_slab_free(void * priv,struct pb_slab * pslab)554 void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
555 {
556 struct amdgpu_slab *slab = amdgpu_slab(pslab);
557
558 for (unsigned i = 0; i < slab->base.num_entries; ++i)
559 amdgpu_bo_remove_fences(&slab->entries[i]);
560
561 FREE(slab->entries);
562 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
563 FREE(slab);
564 }
565
eg_tile_split(unsigned tile_split)566 static unsigned eg_tile_split(unsigned tile_split)
567 {
568 switch (tile_split) {
569 case 0: tile_split = 64; break;
570 case 1: tile_split = 128; break;
571 case 2: tile_split = 256; break;
572 case 3: tile_split = 512; break;
573 default:
574 case 4: tile_split = 1024; break;
575 case 5: tile_split = 2048; break;
576 case 6: tile_split = 4096; break;
577 }
578 return tile_split;
579 }
580
eg_tile_split_rev(unsigned eg_tile_split)581 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
582 {
583 switch (eg_tile_split) {
584 case 64: return 0;
585 case 128: return 1;
586 case 256: return 2;
587 case 512: return 3;
588 default:
589 case 1024: return 4;
590 case 2048: return 5;
591 case 4096: return 6;
592 }
593 }
594
amdgpu_buffer_get_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md)595 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
596 struct radeon_bo_metadata *md)
597 {
598 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
599 struct amdgpu_bo_info info = {0};
600 uint32_t tiling_flags;
601 int r;
602
603 assert(bo->bo && "must not be called for slab entries");
604
605 r = amdgpu_bo_query_info(bo->bo, &info);
606 if (r)
607 return;
608
609 tiling_flags = info.metadata.tiling_info;
610
611 md->microtile = RADEON_LAYOUT_LINEAR;
612 md->macrotile = RADEON_LAYOUT_LINEAR;
613
614 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
615 md->macrotile = RADEON_LAYOUT_TILED;
616 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
617 md->microtile = RADEON_LAYOUT_TILED;
618
619 md->pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
620 md->bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
621 md->bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
622 md->tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
623 md->mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
624 md->num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
625 md->scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
626
627 md->size_metadata = info.metadata.size_metadata;
628 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
629 }
630
amdgpu_buffer_set_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md)631 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
632 struct radeon_bo_metadata *md)
633 {
634 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
635 struct amdgpu_bo_metadata metadata = {0};
636 uint32_t tiling_flags = 0;
637
638 assert(bo->bo && "must not be called for slab entries");
639
640 if (md->macrotile == RADEON_LAYOUT_TILED)
641 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
642 else if (md->microtile == RADEON_LAYOUT_TILED)
643 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
644 else
645 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
646
647 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
648 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
649 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
650 if (md->tile_split)
651 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->tile_split));
652 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
653 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
654
655 if (md->scanout)
656 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
657 else
658 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
659
660 metadata.tiling_info = tiling_flags;
661 metadata.size_metadata = md->size_metadata;
662 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
663
664 amdgpu_bo_set_metadata(bo->bo, &metadata);
665 }
666
667 static struct pb_buffer *
amdgpu_bo_create(struct radeon_winsys * rws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)668 amdgpu_bo_create(struct radeon_winsys *rws,
669 uint64_t size,
670 unsigned alignment,
671 enum radeon_bo_domain domain,
672 enum radeon_bo_flag flags)
673 {
674 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
675 struct amdgpu_winsys_bo *bo;
676 unsigned usage = 0, pb_cache_bucket;
677
678 /* Sub-allocate small buffers from slabs. */
679 if (!(flags & RADEON_FLAG_HANDLE) &&
680 size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
681 alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
682 struct pb_slab_entry *entry;
683 unsigned heap = 0;
684
685 if (flags & RADEON_FLAG_GTT_WC)
686 heap |= 1;
687 if (flags & RADEON_FLAG_CPU_ACCESS)
688 heap |= 2;
689 if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
690 goto no_slab;
691
692 switch (domain) {
693 case RADEON_DOMAIN_VRAM:
694 heap |= 0 * 4;
695 break;
696 case RADEON_DOMAIN_VRAM_GTT:
697 heap |= 1 * 4;
698 break;
699 case RADEON_DOMAIN_GTT:
700 heap |= 2 * 4;
701 break;
702 default:
703 goto no_slab;
704 }
705
706 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
707 if (!entry) {
708 /* Clear the cache and try again. */
709 pb_cache_release_all_buffers(&ws->bo_cache);
710
711 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
712 }
713 if (!entry)
714 return NULL;
715
716 bo = NULL;
717 bo = container_of(entry, bo, u.slab.entry);
718
719 pipe_reference_init(&bo->base.reference, 1);
720
721 return &bo->base;
722 }
723 no_slab:
724
725 /* This flag is irrelevant for the cache. */
726 flags &= ~RADEON_FLAG_HANDLE;
727
728 /* Align size to page size. This is the minimum alignment for normal
729 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
730 * like constant/uniform buffers, can benefit from better and more reuse.
731 */
732 size = align64(size, ws->info.gart_page_size);
733 alignment = align(alignment, ws->info.gart_page_size);
734
735 /* Only set one usage bit each for domains and flags, or the cache manager
736 * might consider different sets of domains / flags compatible
737 */
738 if (domain == RADEON_DOMAIN_VRAM_GTT)
739 usage = 1 << 2;
740 else
741 usage = domain >> 1;
742 assert(flags < sizeof(usage) * 8 - 3);
743 usage |= 1 << (flags + 3);
744
745 /* Determine the pb_cache bucket for minimizing pb_cache misses. */
746 pb_cache_bucket = 0;
747 if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
748 pb_cache_bucket += 1;
749 if (flags == RADEON_FLAG_GTT_WC) /* WC */
750 pb_cache_bucket += 2;
751 assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
752
753 /* Get a buffer from the cache. */
754 bo = (struct amdgpu_winsys_bo*)
755 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
756 pb_cache_bucket);
757 if (bo)
758 return &bo->base;
759
760 /* Create a new one. */
761 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
762 pb_cache_bucket);
763 if (!bo) {
764 /* Clear the cache and try again. */
765 pb_slabs_reclaim(&ws->bo_slabs);
766 pb_cache_release_all_buffers(&ws->bo_cache);
767 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
768 pb_cache_bucket);
769 if (!bo)
770 return NULL;
771 }
772
773 bo->u.real.use_reusable_pool = true;
774 return &bo->base;
775 }
776
amdgpu_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned * stride,unsigned * offset)777 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
778 struct winsys_handle *whandle,
779 unsigned *stride,
780 unsigned *offset)
781 {
782 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
783 struct amdgpu_winsys_bo *bo;
784 enum amdgpu_bo_handle_type type;
785 struct amdgpu_bo_import_result result = {0};
786 uint64_t va;
787 amdgpu_va_handle va_handle;
788 struct amdgpu_bo_info info = {0};
789 enum radeon_bo_domain initial = 0;
790 int r;
791
792 /* Initialize the structure. */
793 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
794 if (!bo) {
795 return NULL;
796 }
797
798 switch (whandle->type) {
799 case DRM_API_HANDLE_TYPE_SHARED:
800 type = amdgpu_bo_handle_type_gem_flink_name;
801 break;
802 case DRM_API_HANDLE_TYPE_FD:
803 type = amdgpu_bo_handle_type_dma_buf_fd;
804 break;
805 default:
806 return NULL;
807 }
808
809 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
810 if (r)
811 goto error;
812
813 /* Get initial domains. */
814 r = amdgpu_bo_query_info(result.buf_handle, &info);
815 if (r)
816 goto error_query;
817
818 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
819 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
820 if (r)
821 goto error_query;
822
823 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
824 if (r)
825 goto error_va_map;
826
827 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
828 initial |= RADEON_DOMAIN_VRAM;
829 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
830 initial |= RADEON_DOMAIN_GTT;
831
832
833 pipe_reference_init(&bo->base.reference, 1);
834 bo->base.alignment = info.phys_alignment;
835 bo->bo = result.buf_handle;
836 bo->base.size = result.alloc_size;
837 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
838 bo->ws = ws;
839 bo->va = va;
840 bo->u.real.va_handle = va_handle;
841 bo->initial_domain = initial;
842 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
843 bo->is_shared = true;
844
845 if (stride)
846 *stride = whandle->stride;
847 if (offset)
848 *offset = whandle->offset;
849
850 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
851 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
852 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
853 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
854
855 amdgpu_add_buffer_to_global_list(bo);
856
857 return &bo->base;
858
859 error_va_map:
860 amdgpu_va_range_free(va_handle);
861
862 error_query:
863 amdgpu_bo_free(result.buf_handle);
864
865 error:
866 FREE(bo);
867 return NULL;
868 }
869
amdgpu_bo_get_handle(struct pb_buffer * buffer,unsigned stride,unsigned offset,unsigned slice_size,struct winsys_handle * whandle)870 static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
871 unsigned stride, unsigned offset,
872 unsigned slice_size,
873 struct winsys_handle *whandle)
874 {
875 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
876 enum amdgpu_bo_handle_type type;
877 int r;
878
879 if (!bo->bo) {
880 offset += bo->va - bo->u.slab.real->va;
881 bo = bo->u.slab.real;
882 }
883
884 bo->u.real.use_reusable_pool = false;
885
886 switch (whandle->type) {
887 case DRM_API_HANDLE_TYPE_SHARED:
888 type = amdgpu_bo_handle_type_gem_flink_name;
889 break;
890 case DRM_API_HANDLE_TYPE_FD:
891 type = amdgpu_bo_handle_type_dma_buf_fd;
892 break;
893 case DRM_API_HANDLE_TYPE_KMS:
894 type = amdgpu_bo_handle_type_kms;
895 break;
896 default:
897 return false;
898 }
899
900 r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
901 if (r)
902 return false;
903
904 whandle->stride = stride;
905 whandle->offset = offset;
906 whandle->offset += slice_size * whandle->layer;
907 bo->is_shared = true;
908 return true;
909 }
910
amdgpu_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size)911 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
912 void *pointer, uint64_t size)
913 {
914 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
915 amdgpu_bo_handle buf_handle;
916 struct amdgpu_winsys_bo *bo;
917 uint64_t va;
918 amdgpu_va_handle va_handle;
919
920 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
921 if (!bo)
922 return NULL;
923
924 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
925 goto error;
926
927 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
928 size, 1 << 12, 0, &va, &va_handle, 0))
929 goto error_va_alloc;
930
931 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
932 goto error_va_map;
933
934 /* Initialize it. */
935 pipe_reference_init(&bo->base.reference, 1);
936 bo->bo = buf_handle;
937 bo->base.alignment = 0;
938 bo->base.size = size;
939 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
940 bo->ws = ws;
941 bo->user_ptr = pointer;
942 bo->va = va;
943 bo->u.real.va_handle = va_handle;
944 bo->initial_domain = RADEON_DOMAIN_GTT;
945 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
946
947 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
948
949 amdgpu_add_buffer_to_global_list(bo);
950
951 return (struct pb_buffer*)bo;
952
953 error_va_map:
954 amdgpu_va_range_free(va_handle);
955
956 error_va_alloc:
957 amdgpu_bo_free(buf_handle);
958
959 error:
960 FREE(bo);
961 return NULL;
962 }
963
amdgpu_bo_is_user_ptr(struct pb_buffer * buf)964 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
965 {
966 return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
967 }
968
amdgpu_bo_get_va(struct pb_buffer * buf)969 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
970 {
971 return ((struct amdgpu_winsys_bo*)buf)->va;
972 }
973
amdgpu_bo_init_functions(struct amdgpu_winsys * ws)974 void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
975 {
976 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
977 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
978 ws->base.buffer_map = amdgpu_bo_map;
979 ws->base.buffer_unmap = amdgpu_bo_unmap;
980 ws->base.buffer_wait = amdgpu_bo_wait;
981 ws->base.buffer_create = amdgpu_bo_create;
982 ws->base.buffer_from_handle = amdgpu_bo_from_handle;
983 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
984 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
985 ws->base.buffer_get_handle = amdgpu_bo_get_handle;
986 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
987 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
988 }
989