• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FREEDRENO_PRIV_H_
28 #define FREEDRENO_PRIV_H_
29 
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 
39 #include <xf86drm.h>
40 
41 #include "util/hash_table.h"
42 #include "util/list.h"
43 #include "util/log.h"
44 #include "util/perf/cpu_trace.h"
45 #include "util/simple_mtx.h"
46 #include "util/slab.h"
47 #include "util/u_atomic.h"
48 #include "util/u_debug.h"
49 #include "util/u_math.h"
50 #include "util/vma.h"
51 
52 #include "freedreno_common.h"
53 #include "freedreno_dev_info.h"
54 #include "freedreno_drmif.h"
55 #include "freedreno_ringbuffer.h"
56 
57 extern simple_mtx_t table_lock;
58 extern simple_mtx_t fence_lock;
59 
60 #define SUBALLOC_SIZE (32 * 1024)
61 /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
62 #define SUBALLOC_ALIGNMENT 64
63 #define RING_FLAGS (FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT)
64 
65 /*
66  * Stupid/simple growable array implementation:
67  */
68 
69 #define MAX_ARRAY_SIZE ((unsigned short)~0)
70 
71 static inline void
grow(void ** ptr,uint16_t nr,uint16_t * max,uint16_t sz)72 grow(void **ptr, uint16_t nr, uint16_t *max, uint16_t sz)
73 {
74    assert((nr + 1) < MAX_ARRAY_SIZE);
75    if ((nr + 1) > *max) {
76       if (*max > MAX_ARRAY_SIZE/2)
77          *max = MAX_ARRAY_SIZE;
78       else if ((*max * 2) < (nr + 1))
79          *max = nr + 5;
80       else
81          *max = *max * 2;
82       *ptr = realloc(*ptr, *max * sz);
83    }
84 }
85 
86 #define DECLARE_ARRAY(type, name)                                              \
87    unsigned short nr_##name, max_##name;                                       \
88    type *name;
89 
90 #define APPEND(x, name, ...)                                                   \
91    ({                                                                          \
92       grow((void **)&(x)->name, (x)->nr_##name, &(x)->max_##name,              \
93            sizeof((x)->name[0]));                                              \
94       (x)->name[(x)->nr_##name] = __VA_ARGS__;                                 \
95       (x)->nr_##name++;                                                        \
96    })
97 
98 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
99 
100 
101 struct fd_device_funcs {
102    /* Create a new buffer object:
103     */
104    struct fd_bo *(*bo_new)(struct fd_device *dev, uint32_t size, uint32_t flags);
105 
106    /* Create a new buffer object from existing handle (ie. dma-buf or
107     * flink import):
108     */
109    struct fd_bo *(*bo_from_handle)(struct fd_device *dev, uint32_t size,
110                                    uint32_t handle);
111    uint32_t (*handle_from_dmabuf)(struct fd_device *dev, int fd);
112    struct fd_bo *(*bo_from_dmabuf)(struct fd_device *dev, int fd);
113    void (*bo_close_handle)(struct fd_bo *bo);
114 
115    struct fd_pipe *(*pipe_new)(struct fd_device *dev, enum fd_pipe_id id,
116                                unsigned prio);
117    int (*flush)(struct fd_device *dev);
118    void (*destroy)(struct fd_device *dev);
119 };
120 
121 struct fd_bo_bucket {
122    uint32_t size;
123    int count, hits, misses, expired;
124    struct list_head list;
125 };
126 
127 struct fd_bo_cache {
128    const char *name;
129    simple_mtx_t lock;
130    struct fd_bo_bucket cache_bucket[14 * 4];
131    int num_buckets;
132    time_t time;
133 };
134 
135 /* Probably good for the block size to be a multiple of an available
136  * large-page size.  For overlap of what both the MMU (with 4kb granule)
137  * and SMMU support, 2MB is that overlap.  (Well, 4kb is as well, but
138  * too small to be practical ;-))
139  */
140 #define FD_BO_HEAP_BLOCK_SIZE (4 * 1024 * 1024)
141 
142 /* Zero is an invalid handle, use it to indicate buffers that have been sub-
143  * allocated from a larger backing heap block buffer.
144  */
145 #define FD_BO_SUBALLOC_HANDLE 0
146 
147 static inline bool
suballoc_bo(struct fd_bo * bo)148 suballoc_bo(struct fd_bo *bo)
149 {
150    return bo->handle == FD_BO_SUBALLOC_HANDLE;
151 }
152 
153 /**
154  * A heap is a virtual range of memory that is backed by N physical buffers,
155  * from which buffers can be suballocated.  This requires kernel support for
156  * userspace allocated iova.
157  */
158 struct fd_bo_heap {
159    struct fd_device *dev;
160 
161    int cnt;
162 
163    /**
164     * Buffer allocation flags for buffers allocated from this heap.
165     */
166    uint32_t flags;
167 
168    simple_mtx_t lock;
169 
170    /**
171     * Ranges of the backing buffer are allocated at a granularity of
172     * SUBALLOC_ALIGNMENT
173     */
174    struct util_vma_heap heap;
175 
176    /**
177     * List of recently freed suballocated BOs from this allocator until they
178     * become idle.  Backend should periodically call fd_bo_suballoc_clean()
179     * to check for newly idle entries on the freelist, so that the memory can
180     * be returned to the free heap.
181     */
182    struct list_head freelist;
183 
184    /**
185     * The backing buffers.  Maximum total heap size is:
186     *   FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks)
187     */
188    struct fd_bo *blocks[256];
189 };
190 
191 struct fd_bo_heap *fd_bo_heap_new(struct fd_device *dev, uint32_t flags);
192 void fd_bo_heap_destroy(struct fd_bo_heap *heap);
193 
194 struct fd_bo *fd_bo_heap_block(struct fd_bo *bo);
195 struct fd_bo *fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size);
196 
197 static inline uint32_t
submit_offset(struct fd_bo * bo,uint32_t offset)198 submit_offset(struct fd_bo *bo, uint32_t offset)
199 {
200    if (suballoc_bo(bo)) {
201       offset += bo->iova - fd_bo_heap_block(bo)->iova;
202    }
203    return offset;
204 }
205 
206 struct fd_device {
207    int fd;
208    enum fd_version version;
209    int32_t refcnt;
210 
211    /* tables to keep track of bo's, to avoid "evil-twin" fd_bo objects:
212     *
213     *   handle_table: maps handle to fd_bo
214     *   name_table: maps flink name to fd_bo
215     *
216     * We end up needing two tables, because DRM_IOCTL_GEM_OPEN always
217     * returns a new handle.  So we need to figure out if the bo is already
218     * open in the process first, before calling gem-open.
219     */
220    struct hash_table *handle_table, *name_table;
221 
222    const struct fd_device_funcs *funcs;
223 
224    struct fd_bo_cache bo_cache;
225    struct fd_bo_cache ring_cache;
226 
227    /**
228     * Heap for mappable + cached-coherent + gpu-readonly (ie. cmdstream)
229     */
230    struct fd_bo_heap *ring_heap;
231 
232    /**
233     * Heap for mappable (ie. majority of small buffer allocations, etc)
234     */
235    struct fd_bo_heap *default_heap;
236 
237    bool has_cached_coherent;
238 
239    bool closefd; /* call close(fd) upon destruction */
240 
241    /* just for valgrind: */
242    int bo_size;
243 
244    /**
245     * List of deferred submits, protected by submit_lock.  The deferred
246     * submits are tracked globally per-device, even if they execute in
247     * different order on the kernel side (ie. due to different priority
248     * submitqueues, etc) to preserve the order that they are passed off
249     * to the kernel.  Once the kernel has them, it is the fences' job
250     * to preserve correct order of execution.
251     */
252    struct list_head deferred_submits;
253    struct fd_fence *deferred_submits_fence;
254    unsigned deferred_cmds;
255    simple_mtx_t submit_lock;
256 
257    /**
258     * BO for suballocating long-lived state objects.
259     *
260     * Note: one would be tempted to put this in fd_pipe to avoid locking.
261     * But that is a bad idea for a couple of reasons:
262     *
263     *  1) With TC, stateobj allocation can happen in either frontend thread
264     *     (ie. most CSOs), and also driver thread (a6xx cached tex state)
265     *  2) It is best for fd_pipe to not hold a reference to a BO that can
266     *     be free'd to bo cache, as that can cause unexpected re-entrancy
267     *     (fd_bo_cache_alloc() -> find_in_bucket() -> fd_bo_state() ->
268     *     cleanup_fences() -> drop pipe ref which free's bo's).
269     */
270    struct fd_bo *suballoc_bo;
271    uint32_t suballoc_offset;
272    simple_mtx_t suballoc_lock;
273 
274    struct util_queue submit_queue;
275 };
276 
277 static inline bool
fd_device_threaded_submit(struct fd_device * dev)278 fd_device_threaded_submit(struct fd_device *dev)
279 {
280    return util_queue_is_initialized(&dev->submit_queue);
281 }
282 
283 #define foreach_submit(name, list) \
284    list_for_each_entry(struct fd_submit, name, list, node)
285 #define foreach_submit_safe(name, list) \
286    list_for_each_entry_safe(struct fd_submit, name, list, node)
287 #define last_submit(list) \
288    list_last_entry(list, struct fd_submit, node)
289 
290 #define foreach_bo(name, list) \
291    list_for_each_entry(struct fd_bo, name, list, node)
292 #define foreach_bo_safe(name, list) \
293    list_for_each_entry_safe(struct fd_bo, name, list, node)
294 #define first_bo(list) \
295    list_first_entry(list, struct fd_bo, node)
296 
297 
298 void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse, const char *name);
299 void fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time);
300 struct fd_bo *fd_bo_cache_alloc(struct fd_bo_cache *cache, uint32_t *size,
301                                 uint32_t flags);
302 int fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo);
303 
304 /* for where @fence_lock is already held: */
305 void fd_pipe_del_locked(struct fd_pipe *pipe);
306 
307 struct fd_pipe_funcs {
308    struct fd_ringbuffer *(*ringbuffer_new_object)(struct fd_pipe *pipe,
309                                                   uint32_t size);
310    struct fd_submit *(*submit_new)(struct fd_pipe *pipe);
311 
312    /**
313     * Flush any deferred submits (if deferred submits are supported by
314     * the pipe implementation)
315     */
316    void (*flush)(struct fd_pipe *pipe, uint32_t fence);
317 
318    int (*get_param)(struct fd_pipe *pipe, enum fd_param_id param,
319                     uint64_t *value);
320    int (*set_param)(struct fd_pipe *pipe, enum fd_param_id param,
321                     uint64_t value);
322    int (*wait)(struct fd_pipe *pipe, const struct fd_fence *fence,
323                uint64_t timeout);
324    void (*destroy)(struct fd_pipe *pipe);
325 };
326 
327 struct fd_pipe_control {
328    uint32_t fence;
329 };
330 #define control_ptr(pipe, member) \
331    (pipe)->control_mem, offsetof(struct fd_pipe_control, member), 0, 0
332 
333 struct fd_pipe {
334    struct fd_device *dev;
335    enum fd_pipe_id id;
336    struct fd_dev_id dev_id;
337 
338    /**
339     * Note refcnt is *not* atomic, but protected by fence_lock, since the
340     * fence_lock is held in fd_bo_add_fence(), which is the hotpath.
341     */
342    int32_t refcnt;
343 
344    /**
345     * Previous fence seqno allocated for this pipe.  The fd_pipe represents
346     * a single timeline, fences allocated by this pipe can be compared to
347     * each other, but fences from different pipes are not comparable (as
348     * there could be preemption of multiple priority level submitqueues at
349     * play)
350     */
351    uint32_t last_fence;
352 
353    /**
354     * The last fence seqno that was flushed to kernel (doesn't mean that it
355     * is complete, just that the kernel knows about it)
356     */
357    uint32_t last_submit_fence;
358 
359    uint32_t last_enqueue_fence;   /* just for debugging */
360 
361    /**
362     * Counter for assigning each submit a unique seqno.
363     */
364    seqno_t submit_seqno;
365 
366    /**
367     * If we *ever* see an in-fence-fd, assume that userspace is
368     * not relying on implicit fences.
369     */
370    bool no_implicit_sync;
371 
372    bool is_64bit;
373 
374    struct fd_bo *control_mem;
375    volatile struct fd_pipe_control *control;
376 
377    struct slab_parent_pool ring_pool;
378 
379    const struct fd_pipe_funcs *funcs;
380 };
381 
382 uint32_t fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring);
383 
384 static inline void
fd_pipe_flush(struct fd_pipe * pipe,uint32_t fence)385 fd_pipe_flush(struct fd_pipe *pipe, uint32_t fence)
386 {
387    if (!pipe->funcs->flush)
388       return;
389    pipe->funcs->flush(pipe, fence);
390 }
391 
392 struct fd_submit_funcs {
393    struct fd_ringbuffer *(*new_ringbuffer)(struct fd_submit *submit,
394                                            uint32_t size,
395                                            enum fd_ringbuffer_flags flags);
396    struct fd_fence *(*flush)(struct fd_submit *submit, int in_fence_fd,
397                              bool use_fence_fd);
398    void (*destroy)(struct fd_submit *submit);
399 };
400 
401 struct fd_submit {
402    int32_t refcnt;
403    struct fd_pipe *pipe;
404    struct fd_device *dev;
405    const struct fd_submit_funcs *funcs;
406 
407    struct fd_ringbuffer *primary;
408    uint32_t fence;
409    struct list_head node;  /* node in fd_pipe::deferred_submits */
410 };
411 
412 static inline unsigned
fd_dev_count_deferred_cmds(struct fd_device * dev)413 fd_dev_count_deferred_cmds(struct fd_device *dev)
414 {
415    unsigned nr = 0;
416 
417    simple_mtx_assert_locked(&dev->submit_lock);
418 
419    list_for_each_entry (struct fd_submit, submit, &dev->deferred_submits, node) {
420       nr += fd_ringbuffer_cmd_count(submit->primary);
421    }
422 
423    return nr;
424 }
425 
426 struct fd_bo_funcs {
427    int (*offset)(struct fd_bo *bo, uint64_t *offset);
428    void *(*map)(struct fd_bo *bo);
429    int (*cpu_prep)(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op);
430    int (*madvise)(struct fd_bo *bo, int willneed);
431    uint64_t (*iova)(struct fd_bo *bo);
432    void (*set_name)(struct fd_bo *bo, const char *fmt, va_list ap);
433    int (*dmabuf)(struct fd_bo *bo);
434 
435    /**
436     * Optional hook that is called before ->destroy().  In the case of
437     * batch deletes (such as BO cache cleanup or cleaning up a submit)
438     * the ->finalize() hook will be called for all of the BOs being
439     * destroyed followed by dev->flush() and then bo->destroy().  This
440     * allows the backend to batch up processing.  (Ie. this is for
441     * virtio backend to batch ccmds to the host)
442     *
443     * In all cases, dev->flush() will happen after bo->finalize() and
444     * bo->destroy().
445     */
446    void (*finalize)(struct fd_bo *bo);
447    void (*destroy)(struct fd_bo *bo);
448 
449    /**
450     * Optional, copy data into bo, falls back to mmap+memcpy.  If not
451     * implemented, it must be possible to mmap all buffers
452     */
453    void (*upload)(struct fd_bo *bo, void *src, unsigned off, unsigned len);
454 
455    /**
456     * Optional, if upload is supported, should upload be preferred?
457     */
458    bool (*prefer_upload)(struct fd_bo *bo, unsigned len);
459 
460    void (*set_metadata)(struct fd_bo *bo, void *metadata, uint32_t metadata_size);
461    int (*get_metadata)(struct fd_bo *bo, void *metadata, uint32_t metadata_size);
462 };
463 
464 void fd_bo_add_fence(struct fd_bo *bo, struct fd_fence *fence);
465 void *fd_bo_map_os_mmap(struct fd_bo *bo);
466 
467 enum fd_bo_state {
468    FD_BO_STATE_IDLE,
469    FD_BO_STATE_BUSY,
470    FD_BO_STATE_UNKNOWN,
471 };
472 enum fd_bo_state fd_bo_state(struct fd_bo *bo);
473 
474 void fd_bo_init_common(struct fd_bo *bo, struct fd_device *dev);
475 void fd_bo_fini_fences(struct fd_bo *bo);
476 void fd_bo_fini_common(struct fd_bo *bo);
477 
478 struct fd_bo *fd_bo_new_ring(struct fd_device *dev, uint32_t size);
479 
480 uint32_t fd_handle_from_dmabuf_drm(struct fd_device *dev, int fd);
481 struct fd_bo *fd_bo_from_dmabuf_drm(struct fd_device *dev, int fd);
482 void fd_bo_close_handle_drm(struct fd_bo *bo);
483 
484 #define enable_debug 0 /* TODO make dynamic */
485 
486 bool fd_dbg(void);
487 
488 #define INFO_MSG(fmt, ...)                                                     \
489    do {                                                                        \
490       if (fd_dbg())                                                            \
491          mesa_logi("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);          \
492    } while (0)
493 #define DEBUG_MSG(fmt, ...)                                                    \
494    do                                                                          \
495       if (enable_debug) {                                                      \
496          mesa_logd("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);          \
497       }                                                                        \
498    while (0)
499 #define WARN_MSG(fmt, ...)                                                     \
500    do {                                                                        \
501       mesa_logw("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);             \
502    } while (0)
503 #define ERROR_MSG(fmt, ...)                                                    \
504    do {                                                                        \
505       mesa_loge("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);             \
506    } while (0)
507 
508 #define U642VOID(x) ((void *)(unsigned long)(x))
509 #define VOID2U64(x) ((uint64_t)(unsigned long)(x))
510 
511 #ifdef HAVE_VALGRIND
512 #include <memcheck.h>
513 
514 /*
515  * For tracking the backing memory (if valgrind enabled, we force a mmap
516  * for the purposes of tracking)
517  */
518 static inline void
VG_BO_ALLOC(struct fd_bo * bo)519 VG_BO_ALLOC(struct fd_bo *bo)
520 {
521    if (bo && RUNNING_ON_VALGRIND) {
522       VALGRIND_MALLOCLIKE_BLOCK(fd_bo_map(bo), bo->size, 0, 1);
523    }
524 }
525 
526 static inline void
VG_BO_FREE(struct fd_bo * bo)527 VG_BO_FREE(struct fd_bo *bo)
528 {
529    VALGRIND_FREELIKE_BLOCK(bo->map, 0);
530 }
531 
532 /*
533  * For tracking bo structs that are in the buffer-cache, so that valgrind
534  * doesn't attribute ownership to the first one to allocate the recycled
535  * bo.
536  *
537  * Note that the list_head in fd_bo is used to track the buffers in cache
538  * so disable error reporting on the range while they are in cache so
539  * valgrind doesn't squawk about list traversal.
540  *
541  */
542 static inline void
VG_BO_RELEASE(struct fd_bo * bo)543 VG_BO_RELEASE(struct fd_bo *bo)
544 {
545    if (RUNNING_ON_VALGRIND) {
546       VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
547       VALGRIND_MAKE_MEM_NOACCESS(bo, bo->dev->bo_size);
548       VALGRIND_FREELIKE_BLOCK(bo->map, 0);
549    }
550 }
551 static inline void
VG_BO_OBTAIN(struct fd_bo * bo)552 VG_BO_OBTAIN(struct fd_bo *bo)
553 {
554    if (RUNNING_ON_VALGRIND) {
555       VALGRIND_MAKE_MEM_DEFINED(bo, bo->dev->bo_size);
556       VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
557       VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1);
558    }
559 }
560 /* special case for fd_bo_upload */
561 static inline void
VG_BO_MAPPED(struct fd_bo * bo)562 VG_BO_MAPPED(struct fd_bo *bo)
563 {
564    VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1);
565 }
566 #else
567 static inline void
VG_BO_ALLOC(struct fd_bo * bo)568 VG_BO_ALLOC(struct fd_bo *bo)
569 {
570 }
571 static inline void
VG_BO_FREE(struct fd_bo * bo)572 VG_BO_FREE(struct fd_bo *bo)
573 {
574 }
575 static inline void
VG_BO_RELEASE(struct fd_bo * bo)576 VG_BO_RELEASE(struct fd_bo *bo)
577 {
578 }
579 static inline void
VG_BO_OBTAIN(struct fd_bo * bo)580 VG_BO_OBTAIN(struct fd_bo *bo)
581 {
582 }
583 static inline void
VG_BO_MAPPED(struct fd_bo * bo)584 VG_BO_MAPPED(struct fd_bo *bo)
585 {
586 }
587 #endif
588 
589 #define FD_DEFINE_CAST(parent, child)                                          \
590    static inline struct child *to_##child(struct parent *x)                    \
591    {                                                                           \
592       return (struct child *)x;                                                \
593    }
594 
595 #endif /* FREEDRENO_PRIV_H_ */
596