• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <pthread.h>
29 #include <stdio.h>
30 #include <xf86drm.h>
31 
32 #include "pan_bo.h"
33 #include "pan_device.h"
34 #include "pan_util.h"
35 #include "wrap.h"
36 
37 #include "util/os_mman.h"
38 
39 #include "util/u_inlines.h"
40 #include "util/u_math.h"
41 
42 /* This file implements a userspace BO cache. Allocating and freeing
43  * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
44  * adds more work than we would like at this point. So caching BOs in userspace
45  * solves both of these problems and does not require kernel updates.
46  *
47  * Cached BOs are sorted into a bucket based on rounding their size down to the
48  * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
49  * objects. Putting a BO into the cache is accomplished by adding it to the
50  * corresponding bucket. Getting a BO from the cache consists of finding the
51  * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
52  * BO and removing it from the bucket. We special case evicting all BOs from
53  * the cache, since that's what helpful in practice and avoids extra logic
54  * around the linked list.
55  */
56 
57 static uint32_t
to_kmod_bo_flags(uint32_t flags)58 to_kmod_bo_flags(uint32_t flags)
59 {
60    uint32_t kmod_bo_flags = 0;
61 
62    if (flags & PAN_BO_EXECUTE)
63       kmod_bo_flags |= PAN_KMOD_BO_FLAG_EXECUTABLE;
64    if (flags & PAN_BO_GROWABLE)
65       kmod_bo_flags |= PAN_KMOD_BO_FLAG_ALLOC_ON_FAULT;
66    if (flags & PAN_BO_INVISIBLE)
67       kmod_bo_flags |= PAN_KMOD_BO_FLAG_NO_MMAP;
68 
69    return kmod_bo_flags;
70 }
71 
72 static struct panfrost_bo *
panfrost_bo_alloc(struct panfrost_device * dev,size_t size,uint32_t flags,const char * label)73 panfrost_bo_alloc(struct panfrost_device *dev, size_t size, uint32_t flags,
74                   const char *label)
75 {
76    struct pan_kmod_vm *exclusive_vm =
77       !(flags & PAN_BO_SHAREABLE) ? dev->kmod.vm : NULL;
78    struct pan_kmod_bo *kmod_bo;
79    struct panfrost_bo *bo;
80 
81    kmod_bo = pan_kmod_bo_alloc(dev->kmod.dev, exclusive_vm, size,
82                                to_kmod_bo_flags(flags));
83 
84    if (kmod_bo == NULL)
85       goto err_alloc;
86 
87    bo = pan_lookup_bo(dev, kmod_bo->handle);
88    assert(!memcmp(bo, &((struct panfrost_bo){0}), sizeof(*bo)));
89    bo->kmod_bo = kmod_bo;
90 
91    struct pan_kmod_vm_op vm_op = {
92       .type = PAN_KMOD_VM_OP_TYPE_MAP,
93       .va =
94          {
95             .start = PAN_KMOD_VM_MAP_AUTO_VA,
96             .size = bo->kmod_bo->size,
97          },
98       .map =
99          {
100             .bo = bo->kmod_bo,
101             .bo_offset = 0,
102          },
103    };
104 
105    int ret =
106       pan_kmod_vm_bind(dev->kmod.vm, PAN_KMOD_VM_OP_MODE_IMMEDIATE, &vm_op, 1);
107 
108    if (ret)
109       goto err_bind;
110 
111    bo->ptr.gpu = vm_op.va.start;
112    bo->flags = flags;
113    bo->dev = dev;
114    bo->label = label;
115    return bo;
116 err_bind:
117    pan_kmod_bo_put(kmod_bo);
118    /* BO will be freed with the sparse array, but zero to indicate free */
119    memset(bo, 0, sizeof(*bo));
120 err_alloc:
121    return NULL;
122 }
123 
124 static void
panfrost_bo_free(struct panfrost_bo * bo)125 panfrost_bo_free(struct panfrost_bo *bo)
126 {
127    struct pan_kmod_bo *kmod_bo = bo->kmod_bo;
128    struct pan_kmod_vm *vm = bo->dev->kmod.vm;
129    uint64_t gpu_va = bo->ptr.gpu;
130 
131    /* BO will be freed with the sparse array, but zero to indicate free */
132    memset(bo, 0, sizeof(*bo));
133 
134    struct pan_kmod_vm_op vm_op = {
135       .type = PAN_KMOD_VM_OP_TYPE_UNMAP,
136       .va =
137          {
138             .start = gpu_va,
139             .size = kmod_bo->size,
140          },
141    };
142 
143    ASSERTED int ret = pan_kmod_vm_bind(
144       vm, PAN_KMOD_VM_OP_MODE_DEFER_TO_NEXT_IDLE_POINT, &vm_op, 1);
145    assert(!ret);
146 
147    pan_kmod_bo_put(kmod_bo);
148 }
149 
150 /* Returns true if the BO is ready, false otherwise.
151  * access_type is encoding the type of access one wants to ensure is done.
152  * Waiting is always done for writers, but if wait_readers is set then readers
153  * are also waited for.
154  */
155 bool
panfrost_bo_wait(struct panfrost_bo * bo,int64_t timeout_ns,bool wait_readers)156 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
157 {
158    /* If the BO has been exported or imported we can't rely on the cached
159     * state, we need to call the WAIT_BO ioctl.
160     */
161    if (!(bo->flags & PAN_BO_SHARED)) {
162       /* If ->gpu_access is 0, the BO is idle, no need to wait. */
163       if (!bo->gpu_access)
164          return true;
165 
166       /* If the caller only wants to wait for writers and no
167        * writes are pending, we don't have to wait.
168        */
169       if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
170          return true;
171    }
172 
173    if (pan_kmod_bo_wait(bo->kmod_bo, timeout_ns, !wait_readers)) {
174       /* Set gpu_access to 0 so that the next call to bo_wait()
175        * doesn't have to call the WAIT_BO ioctl.
176        */
177       bo->gpu_access = 0;
178       return true;
179    }
180 
181    return false;
182 }
183 
184 /* Helper to calculate the bucket index of a BO */
185 
186 static unsigned
pan_bucket_index(unsigned size)187 pan_bucket_index(unsigned size)
188 {
189    /* Round down to POT to compute a bucket index */
190 
191    unsigned bucket_index = util_logbase2(size);
192 
193    /* Clamp the bucket index; all huge allocations will be
194     * sorted into the largest bucket */
195 
196    bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET, MAX_BO_CACHE_BUCKET);
197 
198    /* Reindex from 0 */
199    return (bucket_index - MIN_BO_CACHE_BUCKET);
200 }
201 
202 static struct list_head *
pan_bucket(struct panfrost_device * dev,unsigned size)203 pan_bucket(struct panfrost_device *dev, unsigned size)
204 {
205    return &dev->bo_cache.buckets[pan_bucket_index(size)];
206 }
207 
208 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
209  * BO cache. If it succeeds, it returns that BO and removes the BO from the
210  * cache. If it fails, it returns NULL signaling the caller to allocate a new
211  * BO. */
212 
213 static struct panfrost_bo *
panfrost_bo_cache_fetch(struct panfrost_device * dev,size_t size,uint32_t flags,const char * label,bool dontwait)214 panfrost_bo_cache_fetch(struct panfrost_device *dev, size_t size,
215                         uint32_t flags, const char *label, bool dontwait)
216 {
217    pthread_mutex_lock(&dev->bo_cache.lock);
218    struct list_head *bucket = pan_bucket(dev, size);
219    struct panfrost_bo *bo = NULL;
220 
221    /* Iterate the bucket looking for something suitable */
222    list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
223       if (panfrost_bo_size(entry) < size || entry->flags != flags)
224          continue;
225 
226       /* If the oldest BO in the cache is busy, likely so is
227        * everything newer, so bail. */
228       if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, true))
229          break;
230 
231       /* This one works, splice it out of the cache */
232       list_del(&entry->bucket_link);
233       list_del(&entry->lru_link);
234 
235       if (!pan_kmod_bo_make_unevictable(entry->kmod_bo)) {
236          panfrost_bo_free(entry);
237          continue;
238       }
239       /* Let's go! */
240       bo = entry;
241       bo->label = label;
242       break;
243    }
244    pthread_mutex_unlock(&dev->bo_cache.lock);
245 
246    return bo;
247 }
248 
249 static void
panfrost_bo_cache_evict_stale_bos(struct panfrost_device * dev)250 panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
251 {
252    struct timespec time;
253 
254    clock_gettime(CLOCK_MONOTONIC, &time);
255    list_for_each_entry_safe(struct panfrost_bo, entry, &dev->bo_cache.lru,
256                             lru_link) {
257       /* We want all entries that have been used more than 1 sec
258        * ago to be dropped, others can be kept.
259        * Note the <= 2 check and not <= 1. It's here to account for
260        * the fact that we're only testing ->tv_sec, not ->tv_nsec.
261        * That means we might keep entries that are between 1 and 2
262        * seconds old, but we don't really care, as long as unused BOs
263        * are dropped at some point.
264        */
265       if (time.tv_sec - entry->last_used <= 2)
266          break;
267 
268       list_del(&entry->bucket_link);
269       list_del(&entry->lru_link);
270       panfrost_bo_free(entry);
271    }
272 }
273 
274 /* Tries to add a BO to the cache. Returns if it was
275  * successful */
276 
277 static bool
panfrost_bo_cache_put(struct panfrost_bo * bo)278 panfrost_bo_cache_put(struct panfrost_bo *bo)
279 {
280    struct panfrost_device *dev = bo->dev;
281 
282    if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
283       return false;
284 
285    /* Must be first */
286    pthread_mutex_lock(&dev->bo_cache.lock);
287 
288    struct list_head *bucket = pan_bucket(dev, MAX2(panfrost_bo_size(bo), 4096));
289    struct timespec time;
290 
291    pan_kmod_bo_make_evictable(bo->kmod_bo);
292 
293    /* Add us to the bucket */
294    list_addtail(&bo->bucket_link, bucket);
295 
296    /* Add us to the LRU list and update the last_used field. */
297    list_addtail(&bo->lru_link, &dev->bo_cache.lru);
298    clock_gettime(CLOCK_MONOTONIC, &time);
299    bo->last_used = time.tv_sec;
300 
301    /* Let's do some cleanup in the BO cache while we hold the
302     * lock.
303     */
304    panfrost_bo_cache_evict_stale_bos(dev);
305 
306    /* Update the label to help debug BO cache memory usage issues */
307    bo->label = "Unused (BO cache)";
308 
309    /* Must be last */
310    pthread_mutex_unlock(&dev->bo_cache.lock);
311    return true;
312 }
313 
314 /* Evicts all BOs from the cache. Called during context
315  * destroy or during low-memory situations (to free up
316  * memory that may be unused by us just sitting in our
317  * cache, but still reserved from the perspective of the
318  * OS) */
319 
320 void
panfrost_bo_cache_evict_all(struct panfrost_device * dev)321 panfrost_bo_cache_evict_all(struct panfrost_device *dev)
322 {
323    pthread_mutex_lock(&dev->bo_cache.lock);
324    for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
325       struct list_head *bucket = &dev->bo_cache.buckets[i];
326 
327       list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
328          list_del(&entry->bucket_link);
329          list_del(&entry->lru_link);
330          panfrost_bo_free(entry);
331       }
332    }
333    pthread_mutex_unlock(&dev->bo_cache.lock);
334 }
335 
336 int
panfrost_bo_mmap(struct panfrost_bo * bo)337 panfrost_bo_mmap(struct panfrost_bo *bo)
338 {
339    if (bo->ptr.cpu)
340       return 0;
341 
342    bo->ptr.cpu = pan_kmod_bo_mmap(bo->kmod_bo, 0, panfrost_bo_size(bo),
343                                   PROT_READ | PROT_WRITE, MAP_SHARED, NULL);
344    if (bo->ptr.cpu == MAP_FAILED) {
345       bo->ptr.cpu = NULL;
346       return -1;
347    }
348 
349    return 0;
350 }
351 
352 static void
panfrost_bo_munmap(struct panfrost_bo * bo)353 panfrost_bo_munmap(struct panfrost_bo *bo)
354 {
355    if (!bo->ptr.cpu)
356       return;
357 
358    if (os_munmap((void *)(uintptr_t)bo->ptr.cpu, panfrost_bo_size(bo))) {
359       mesa_loge("munmap failed: %s", strerror(errno));
360       abort();
361    }
362 
363    bo->ptr.cpu = NULL;
364 }
365 
366 struct panfrost_bo *
panfrost_bo_create(struct panfrost_device * dev,size_t size,uint32_t flags,const char * label)367 panfrost_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags,
368                    const char *label)
369 {
370    struct panfrost_bo *bo;
371 
372    if (dev->debug & PAN_DBG_DUMP) {
373       /* Make sure to CPU-map all BOs except growable ones, so that
374          we can dump them when PAN_MESA_DEBUG=dump. */
375       if (!(flags & PAN_BO_GROWABLE)) {
376          flags &= ~PAN_BO_INVISIBLE;
377       }
378       flags &= ~PAN_BO_DELAY_MMAP;
379    }
380    /* Kernel will fail (confusingly) with EPERM otherwise */
381    assert(size > 0);
382 
383    /* To maximize BO cache usage, don't allocate tiny BOs */
384    size = ALIGN_POT(size, 4096);
385 
386    /* GROWABLE BOs cannot be mmapped */
387    if (flags & PAN_BO_GROWABLE)
388       assert(flags & PAN_BO_INVISIBLE);
389 
390    /* Ideally, we get a BO that's ready in the cache, or allocate a fresh
391     * BO. If allocation fails, we can try waiting for something in the
392     * cache. But if there's no nothing suitable, we should flush the cache
393     * to make space for the new allocation.
394     */
395    bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
396    if (!bo)
397       bo = panfrost_bo_alloc(dev, size, flags, label);
398    if (!bo)
399       bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
400    if (!bo) {
401       panfrost_bo_cache_evict_all(dev);
402       bo = panfrost_bo_alloc(dev, size, flags, label);
403    }
404 
405    if (!bo)
406       return NULL;
407 
408    /* Only mmap now if we know we need to. For CPU-invisible buffers, we
409     * never map since we don't care about their contents; they're purely
410     * for GPU-internal use. But we do trace them anyway. */
411 
412    if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP))) {
413       if (panfrost_bo_mmap(bo)) {
414          panfrost_bo_free(bo);
415          return NULL;
416       }
417    }
418 
419    p_atomic_set(&bo->refcnt, 1);
420 
421    if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
422       if (flags & PAN_BO_INVISIBLE)
423          pandecode_inject_mmap(dev->decode_ctx, bo->ptr.gpu, NULL,
424                                panfrost_bo_size(bo), NULL);
425       else if (!(flags & PAN_BO_DELAY_MMAP))
426          pandecode_inject_mmap(dev->decode_ctx, bo->ptr.gpu, bo->ptr.cpu,
427                                panfrost_bo_size(bo), NULL);
428    }
429 
430    return bo;
431 }
432 
433 void
panfrost_bo_reference(struct panfrost_bo * bo)434 panfrost_bo_reference(struct panfrost_bo *bo)
435 {
436    if (bo) {
437       ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
438       assert(count != 1);
439    }
440 }
441 
442 void
panfrost_bo_unreference(struct panfrost_bo * bo)443 panfrost_bo_unreference(struct panfrost_bo *bo)
444 {
445    if (!bo)
446       return;
447 
448    /* Don't return to cache if there are still references */
449    assert(p_atomic_read(&bo->refcnt) > 0);
450    if (p_atomic_dec_return(&bo->refcnt))
451       return;
452 
453    struct panfrost_device *dev = bo->dev;
454 
455    pthread_mutex_lock(&dev->bo_map_lock);
456 
457    /* Someone might have imported this BO while we were waiting for the
458     * lock, let's make sure it's still not referenced before freeing it.
459     */
460    if (p_atomic_read(&bo->refcnt) == 0) {
461       /* When the reference count goes to zero, we need to cleanup */
462       panfrost_bo_munmap(bo);
463 
464       if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
465          pandecode_inject_free(dev->decode_ctx, bo->ptr.gpu,
466                                panfrost_bo_size(bo));
467 
468       /* Rather than freeing the BO now, we'll cache the BO for later
469        * allocations if we're allowed to.
470        */
471       if (!panfrost_bo_cache_put(bo))
472          panfrost_bo_free(bo);
473    }
474    pthread_mutex_unlock(&dev->bo_map_lock);
475 }
476 
477 struct panfrost_bo *
panfrost_bo_import(struct panfrost_device * dev,int fd)478 panfrost_bo_import(struct panfrost_device *dev, int fd)
479 {
480    struct panfrost_bo *bo;
481    ASSERTED int ret;
482    unsigned gem_handle;
483 
484    pthread_mutex_lock(&dev->bo_map_lock);
485    ret = drmPrimeFDToHandle(dev->kmod.dev->fd, fd, &gem_handle);
486    assert(!ret);
487 
488    bo = pan_lookup_bo(dev, gem_handle);
489 
490    if (!bo->dev) {
491       bo->dev = dev;
492       bo->kmod_bo = pan_kmod_bo_import(dev->kmod.dev, fd, 0);
493 
494       struct pan_kmod_vm_op vm_op = {
495          .type = PAN_KMOD_VM_OP_TYPE_MAP,
496          .va =
497             {
498                .start = PAN_KMOD_VM_MAP_AUTO_VA,
499                .size = bo->kmod_bo->size,
500             },
501          .map =
502             {
503                .bo = bo->kmod_bo,
504                .bo_offset = 0,
505             },
506       };
507 
508       ASSERTED int ret = pan_kmod_vm_bind(
509          dev->kmod.vm, PAN_KMOD_VM_OP_MODE_IMMEDIATE, &vm_op, 1);
510       assert(!ret);
511 
512       bo->ptr.gpu = vm_op.va.start;
513       bo->flags = PAN_BO_SHARED;
514       p_atomic_set(&bo->refcnt, 1);
515 
516       /* mmap imported BOs when PAN_MESA_DEBUG=dump */
517       if ((dev->debug & PAN_DBG_DUMP) && panfrost_bo_mmap(bo))
518          mesa_loge("failed to mmap");
519    } else {
520       /* bo->refcnt == 0 can happen if the BO
521        * was being released but panfrost_bo_import() acquired the
522        * lock before panfrost_bo_unreference(). In that case, refcnt
523        * is 0 and we can't use panfrost_bo_reference() directly, we
524        * have to re-initialize the refcnt().
525        * Note that panfrost_bo_unreference() checks
526        * refcnt value just after acquiring the lock to
527        * make sure the object is not freed if panfrost_bo_import()
528        * acquired it in the meantime.
529        */
530       if (p_atomic_read(&bo->refcnt) == 0)
531          p_atomic_set(&bo->refcnt, 1);
532       else
533          panfrost_bo_reference(bo);
534    }
535    pthread_mutex_unlock(&dev->bo_map_lock);
536 
537    return bo;
538 }
539 
540 int
panfrost_bo_export(struct panfrost_bo * bo)541 panfrost_bo_export(struct panfrost_bo *bo)
542 {
543    int ret = pan_kmod_bo_export(bo->kmod_bo);
544    if (ret >= 0)
545       bo->flags |= PAN_BO_SHARED;
546 
547    return ret;
548 }
549 
550 struct panfrost_bo *
panfrost_bo_from_kmod_bo(struct panfrost_device * dev,struct pan_kmod_bo * kmod_bo)551 panfrost_bo_from_kmod_bo(struct panfrost_device *dev,
552                          struct pan_kmod_bo *kmod_bo)
553 {
554    if (!kmod_bo)
555       return NULL;
556 
557    struct panfrost_bo *bo = pan_lookup_bo(dev, pan_kmod_bo_handle(kmod_bo));
558    assert(bo->kmod_bo == kmod_bo);
559 
560    return bo;
561 }
562