1 /*
2 * Copyright 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26 #include <errno.h>
27 #include <stdio.h>
28 #include <fcntl.h>
29 #include <xf86drm.h>
30 #include <pthread.h>
31 #include "drm-uapi/panfrost_drm.h"
32
33 #include "pan_bo.h"
34 #include "pan_device.h"
35 #include "pan_util.h"
36 #include "wrap.h"
37
38 #include "os/os_mman.h"
39
40 #include "util/u_inlines.h"
41 #include "util/u_math.h"
42
43 /* This file implements a userspace BO cache. Allocating and freeing
44 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
45 * adds more work than we would like at this point. So caching BOs in userspace
46 * solves both of these problems and does not require kernel updates.
47 *
48 * Cached BOs are sorted into a bucket based on rounding their size down to the
49 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
50 * objects. Putting a BO into the cache is accomplished by adding it to the
51 * corresponding bucket. Getting a BO from the cache consists of finding the
52 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
53 * BO and removing it from the bucket. We special case evicting all BOs from
54 * the cache, since that's what helpful in practice and avoids extra logic
55 * around the linked list.
56 */
57
58 static struct panfrost_bo *
panfrost_bo_alloc(struct panfrost_device * dev,size_t size,uint32_t flags,const char * label)59 panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
60 uint32_t flags, const char *label)
61 {
62 struct drm_panfrost_create_bo create_bo = { .size = size };
63 struct panfrost_bo *bo;
64 int ret;
65
66 if (dev->kernel_version->version_major > 1 ||
67 dev->kernel_version->version_minor >= 1) {
68 if (flags & PAN_BO_GROWABLE)
69 create_bo.flags |= PANFROST_BO_HEAP;
70 if (!(flags & PAN_BO_EXECUTE))
71 create_bo.flags |= PANFROST_BO_NOEXEC;
72 }
73
74 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
75 if (ret) {
76 fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
77 return NULL;
78 }
79
80 bo = pan_lookup_bo(dev, create_bo.handle);
81 assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));
82
83 bo->size = create_bo.size;
84 bo->ptr.gpu = create_bo.offset;
85 bo->gem_handle = create_bo.handle;
86 bo->flags = flags;
87 bo->dev = dev;
88 bo->label = label;
89 return bo;
90 }
91
92 static void
panfrost_bo_free(struct panfrost_bo * bo)93 panfrost_bo_free(struct panfrost_bo *bo)
94 {
95 struct drm_gem_close gem_close = { .handle = bo->gem_handle };
96 int ret;
97
98 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
99 if (ret) {
100 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
101 assert(0);
102 }
103
104 /* BO will be freed with the sparse array, but zero to indicate free */
105 memset(bo, 0, sizeof(*bo));
106 }
107
108 /* Returns true if the BO is ready, false otherwise.
109 * access_type is encoding the type of access one wants to ensure is done.
110 * Waiting is always done for writers, but if wait_readers is set then readers
111 * are also waited for.
112 */
113 bool
panfrost_bo_wait(struct panfrost_bo * bo,int64_t timeout_ns,bool wait_readers)114 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
115 {
116 struct drm_panfrost_wait_bo req = {
117 .handle = bo->gem_handle,
118 .timeout_ns = timeout_ns,
119 };
120 int ret;
121
122 /* If the BO has been exported or imported we can't rely on the cached
123 * state, we need to call the WAIT_BO ioctl.
124 */
125 if (!(bo->flags & PAN_BO_SHARED)) {
126 /* If ->gpu_access is 0, the BO is idle, no need to wait. */
127 if (!bo->gpu_access)
128 return true;
129
130 /* If the caller only wants to wait for writers and no
131 * writes are pending, we don't have to wait.
132 */
133 if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
134 return true;
135 }
136
137 /* The ioctl returns >= 0 value when the BO we are waiting for is ready
138 * -1 otherwise.
139 */
140 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
141 if (ret != -1) {
142 /* Set gpu_access to 0 so that the next call to bo_wait()
143 * doesn't have to call the WAIT_BO ioctl.
144 */
145 bo->gpu_access = 0;
146 return true;
147 }
148
149 /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
150 * is invalid, which shouldn't happen here.
151 */
152 assert(errno == ETIMEDOUT || errno == EBUSY);
153 return false;
154 }
155
156 /* Helper to calculate the bucket index of a BO */
157
158 static unsigned
pan_bucket_index(unsigned size)159 pan_bucket_index(unsigned size)
160 {
161 /* Round down to POT to compute a bucket index */
162
163 unsigned bucket_index = util_logbase2(size);
164
165 /* Clamp the bucket index; all huge allocations will be
166 * sorted into the largest bucket */
167
168 bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET,
169 MAX_BO_CACHE_BUCKET);
170
171 /* Reindex from 0 */
172 return (bucket_index - MIN_BO_CACHE_BUCKET);
173 }
174
175 static struct list_head *
pan_bucket(struct panfrost_device * dev,unsigned size)176 pan_bucket(struct panfrost_device *dev, unsigned size)
177 {
178 return &dev->bo_cache.buckets[pan_bucket_index(size)];
179 }
180
181 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
182 * BO cache. If it succeeds, it returns that BO and removes the BO from the
183 * cache. If it fails, it returns NULL signaling the caller to allocate a new
184 * BO. */
185
186 static struct panfrost_bo *
panfrost_bo_cache_fetch(struct panfrost_device * dev,size_t size,uint32_t flags,const char * label,bool dontwait)187 panfrost_bo_cache_fetch(struct panfrost_device *dev,
188 size_t size, uint32_t flags, const char *label,
189 bool dontwait)
190 {
191 pthread_mutex_lock(&dev->bo_cache.lock);
192 struct list_head *bucket = pan_bucket(dev, size);
193 struct panfrost_bo *bo = NULL;
194
195 /* Iterate the bucket looking for something suitable */
196 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
197 bucket_link) {
198 if (entry->size < size || entry->flags != flags)
199 continue;
200
201 /* If the oldest BO in the cache is busy, likely so is
202 * everything newer, so bail. */
203 if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
204 PAN_BO_ACCESS_RW))
205 break;
206
207 struct drm_panfrost_madvise madv = {
208 .handle = entry->gem_handle,
209 .madv = PANFROST_MADV_WILLNEED,
210 };
211 int ret;
212
213 /* This one works, splice it out of the cache */
214 list_del(&entry->bucket_link);
215 list_del(&entry->lru_link);
216
217 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
218 if (!ret && !madv.retained) {
219 panfrost_bo_free(entry);
220 continue;
221 }
222 /* Let's go! */
223 bo = entry;
224 bo->label = label;
225 break;
226 }
227 pthread_mutex_unlock(&dev->bo_cache.lock);
228
229 return bo;
230 }
231
232 static void
panfrost_bo_cache_evict_stale_bos(struct panfrost_device * dev)233 panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
234 {
235 struct timespec time;
236
237 clock_gettime(CLOCK_MONOTONIC, &time);
238 list_for_each_entry_safe(struct panfrost_bo, entry,
239 &dev->bo_cache.lru, lru_link) {
240 /* We want all entries that have been used more than 1 sec
241 * ago to be dropped, others can be kept.
242 * Note the <= 2 check and not <= 1. It's here to account for
243 * the fact that we're only testing ->tv_sec, not ->tv_nsec.
244 * That means we might keep entries that are between 1 and 2
245 * seconds old, but we don't really care, as long as unused BOs
246 * are dropped at some point.
247 */
248 if (time.tv_sec - entry->last_used <= 2)
249 break;
250
251 list_del(&entry->bucket_link);
252 list_del(&entry->lru_link);
253 panfrost_bo_free(entry);
254 }
255 }
256
257 /* Tries to add a BO to the cache. Returns if it was
258 * successful */
259
260 static bool
panfrost_bo_cache_put(struct panfrost_bo * bo)261 panfrost_bo_cache_put(struct panfrost_bo *bo)
262 {
263 struct panfrost_device *dev = bo->dev;
264
265 if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
266 return false;
267
268 /* Must be first */
269 pthread_mutex_lock(&dev->bo_cache.lock);
270
271 struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
272 struct drm_panfrost_madvise madv;
273 struct timespec time;
274
275 madv.handle = bo->gem_handle;
276 madv.madv = PANFROST_MADV_DONTNEED;
277 madv.retained = 0;
278
279 drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
280
281 /* Add us to the bucket */
282 list_addtail(&bo->bucket_link, bucket);
283
284 /* Add us to the LRU list and update the last_used field. */
285 list_addtail(&bo->lru_link, &dev->bo_cache.lru);
286 clock_gettime(CLOCK_MONOTONIC, &time);
287 bo->last_used = time.tv_sec;
288
289 /* Let's do some cleanup in the BO cache while we hold the
290 * lock.
291 */
292 panfrost_bo_cache_evict_stale_bos(dev);
293
294 /* Update the label to help debug BO cache memory usage issues */
295 bo->label = "Unused (BO cache)";
296
297 /* Must be last */
298 pthread_mutex_unlock(&dev->bo_cache.lock);
299 return true;
300 }
301
302 /* Evicts all BOs from the cache. Called during context
303 * destroy or during low-memory situations (to free up
304 * memory that may be unused by us just sitting in our
305 * cache, but still reserved from the perspective of the
306 * OS) */
307
308 void
panfrost_bo_cache_evict_all(struct panfrost_device * dev)309 panfrost_bo_cache_evict_all(
310 struct panfrost_device *dev)
311 {
312 pthread_mutex_lock(&dev->bo_cache.lock);
313 for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
314 struct list_head *bucket = &dev->bo_cache.buckets[i];
315
316 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
317 bucket_link) {
318 list_del(&entry->bucket_link);
319 list_del(&entry->lru_link);
320 panfrost_bo_free(entry);
321 }
322 }
323 pthread_mutex_unlock(&dev->bo_cache.lock);
324 }
325
326 void
panfrost_bo_mmap(struct panfrost_bo * bo)327 panfrost_bo_mmap(struct panfrost_bo *bo)
328 {
329 struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
330 int ret;
331
332 if (bo->ptr.cpu)
333 return;
334
335 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
336 if (ret) {
337 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
338 assert(0);
339 }
340
341 bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
342 bo->dev->fd, mmap_bo.offset);
343 if (bo->ptr.cpu == MAP_FAILED) {
344 bo->ptr.cpu = NULL;
345 fprintf(stderr,
346 "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
347 bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
348 (long long)mmap_bo.offset);
349 }
350 }
351
352 static void
panfrost_bo_munmap(struct panfrost_bo * bo)353 panfrost_bo_munmap(struct panfrost_bo *bo)
354 {
355 if (!bo->ptr.cpu)
356 return;
357
358 if (os_munmap((void *) (uintptr_t)bo->ptr.cpu, bo->size)) {
359 perror("munmap");
360 abort();
361 }
362
363 bo->ptr.cpu = NULL;
364 }
365
366 struct panfrost_bo *
panfrost_bo_create(struct panfrost_device * dev,size_t size,uint32_t flags,const char * label)367 panfrost_bo_create(struct panfrost_device *dev, size_t size,
368 uint32_t flags, const char *label)
369 {
370 struct panfrost_bo *bo;
371
372 /* Kernel will fail (confusingly) with EPERM otherwise */
373 assert(size > 0);
374
375 /* To maximize BO cache usage, don't allocate tiny BOs */
376 size = ALIGN_POT(size, 4096);
377
378 /* GROWABLE BOs cannot be mmapped */
379 if (flags & PAN_BO_GROWABLE)
380 assert(flags & PAN_BO_INVISIBLE);
381
382 /* Before creating a BO, we first want to check the cache but without
383 * waiting for BO readiness (BOs in the cache can still be referenced
384 * by jobs that are not finished yet).
385 * If the cached allocation fails we fall back on fresh BO allocation,
386 * and if that fails too, we try one more time to allocate from the
387 * cache, but this time we accept to wait.
388 */
389 bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
390 if (!bo)
391 bo = panfrost_bo_alloc(dev, size, flags, label);
392 if (!bo)
393 bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
394
395 assert(bo);
396
397 if (!bo) {
398 fprintf(stderr, "BO creation failed\n");
399 return NULL;
400 }
401
402 /* Only mmap now if we know we need to. For CPU-invisible buffers, we
403 * never map since we don't care about their contents; they're purely
404 * for GPU-internal use. But we do trace them anyway. */
405
406 if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
407 panfrost_bo_mmap(bo);
408
409 p_atomic_set(&bo->refcnt, 1);
410
411 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
412 if (flags & PAN_BO_INVISIBLE)
413 pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
414 else if (!(flags & PAN_BO_DELAY_MMAP))
415 pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
416 }
417
418 return bo;
419 }
420
421 void
panfrost_bo_reference(struct panfrost_bo * bo)422 panfrost_bo_reference(struct panfrost_bo *bo)
423 {
424 if (bo) {
425 ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
426 assert(count != 1);
427 }
428 }
429
430 void
panfrost_bo_unreference(struct panfrost_bo * bo)431 panfrost_bo_unreference(struct panfrost_bo *bo)
432 {
433 if (!bo)
434 return;
435
436 /* Don't return to cache if there are still references */
437 if (p_atomic_dec_return(&bo->refcnt))
438 return;
439
440 struct panfrost_device *dev = bo->dev;
441
442 pthread_mutex_lock(&dev->bo_map_lock);
443
444 /* Someone might have imported this BO while we were waiting for the
445 * lock, let's make sure it's still not referenced before freeing it.
446 */
447 if (p_atomic_read(&bo->refcnt) == 0) {
448 /* When the reference count goes to zero, we need to cleanup */
449 panfrost_bo_munmap(bo);
450
451 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
452 pandecode_inject_free(bo->ptr.gpu, bo->size);
453
454 /* Rather than freeing the BO now, we'll cache the BO for later
455 * allocations if we're allowed to.
456 */
457 if (!panfrost_bo_cache_put(bo))
458 panfrost_bo_free(bo);
459
460 }
461 pthread_mutex_unlock(&dev->bo_map_lock);
462 }
463
464 struct panfrost_bo *
panfrost_bo_import(struct panfrost_device * dev,int fd)465 panfrost_bo_import(struct panfrost_device *dev, int fd)
466 {
467 struct panfrost_bo *bo;
468 struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
469 ASSERTED int ret;
470 unsigned gem_handle;
471
472 ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
473 assert(!ret);
474
475 pthread_mutex_lock(&dev->bo_map_lock);
476 bo = pan_lookup_bo(dev, gem_handle);
477
478 if (!bo->dev) {
479 get_bo_offset.handle = gem_handle;
480 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
481 assert(!ret);
482
483 bo->dev = dev;
484 bo->ptr.gpu = (mali_ptr) get_bo_offset.offset;
485 bo->size = lseek(fd, 0, SEEK_END);
486 /* Sometimes this can fail and return -1. size of -1 is not
487 * a nice thing for mmap to try mmap. Be more robust also
488 * for zero sized maps and fail nicely too
489 */
490 if ((bo->size == 0) || (bo->size == (size_t)-1)) {
491 pthread_mutex_unlock(&dev->bo_map_lock);
492 return NULL;
493 }
494 bo->flags = PAN_BO_SHARED;
495 bo->gem_handle = gem_handle;
496 p_atomic_set(&bo->refcnt, 1);
497 } else {
498 /* bo->refcnt == 0 can happen if the BO
499 * was being released but panfrost_bo_import() acquired the
500 * lock before panfrost_bo_unreference(). In that case, refcnt
501 * is 0 and we can't use panfrost_bo_reference() directly, we
502 * have to re-initialize the refcnt().
503 * Note that panfrost_bo_unreference() checks
504 * refcnt value just after acquiring the lock to
505 * make sure the object is not freed if panfrost_bo_import()
506 * acquired it in the meantime.
507 */
508 if (p_atomic_read(&bo->refcnt) == 0)
509 p_atomic_set(&bo->refcnt, 1);
510 else
511 panfrost_bo_reference(bo);
512 }
513 pthread_mutex_unlock(&dev->bo_map_lock);
514
515 return bo;
516 }
517
518 int
panfrost_bo_export(struct panfrost_bo * bo)519 panfrost_bo_export(struct panfrost_bo *bo)
520 {
521 struct drm_prime_handle args = {
522 .handle = bo->gem_handle,
523 .flags = DRM_CLOEXEC,
524 };
525
526 int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
527 if (ret == -1)
528 return -1;
529
530 bo->flags |= PAN_BO_SHARED;
531 return args.fd;
532 }
533
534