1 /*
2  * Copyright © 2012-2018 Rob Clark <robclark@freedesktop.org>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <robclark@freedesktop.org>
7  */
8 
9 #ifndef FREEDRENO_PRIV_H_
10 #define FREEDRENO_PRIV_H_
11 
12 #include <errno.h>
13 #include <fcntl.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <unistd.h>
18 #include <sys/ioctl.h>
19 #include <sys/mman.h>
20 
21 #include <xf86drm.h>
22 
23 #include "util/hash_table.h"
24 #include "util/list.h"
25 #include "util/log.h"
26 #include "util/perf/cpu_trace.h"
27 #include "util/simple_mtx.h"
28 #include "util/slab.h"
29 #include "util/u_atomic.h"
30 #include "util/u_debug.h"
31 #include "util/u_math.h"
32 #include "util/vma.h"
33 
34 #include "freedreno_common.h"
35 #include "freedreno_dev_info.h"
36 #include "freedreno_drmif.h"
37 #include "freedreno_rd_output.h"
38 #include "freedreno_ringbuffer.h"
39 
40 extern simple_mtx_t table_lock;
41 extern simple_mtx_t fence_lock;
42 extern uint64_t os_page_size;
43 
44 #define SUBALLOC_SIZE (32 * 1024)
45 /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
46 #define SUBALLOC_ALIGNMENT 64
47 #define RING_FLAGS (FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT | FD_BO_HINT_COMMAND)
48 
49 /*
50  * Stupid/simple growable array implementation:
51  */
52 
53 #define MAX_ARRAY_SIZE ((unsigned short)~0)
54 
55 static inline void
grow(void ** ptr,uint16_t nr,uint16_t * max,uint16_t sz)56 grow(void **ptr, uint16_t nr, uint16_t *max, uint16_t sz)
57 {
58    assert((nr + 1) < MAX_ARRAY_SIZE);
59    if ((nr + 1) > *max) {
60       if (*max > MAX_ARRAY_SIZE/2)
61          *max = MAX_ARRAY_SIZE;
62       else if ((*max * 2) < (nr + 1))
63          *max = nr + 5;
64       else
65          *max = *max * 2;
66       *ptr = realloc(*ptr, *max * sz);
67    }
68 }
69 
70 #define DECLARE_ARRAY(type, name)                                              \
71    unsigned short nr_##name, max_##name;                                       \
72    type *name;
73 
74 #define APPEND(x, name, ...)                                                   \
75    ({                                                                          \
76       grow((void **)&(x)->name, (x)->nr_##name, &(x)->max_##name,              \
77            sizeof((x)->name[0]));                                              \
78       (x)->name[(x)->nr_##name] = __VA_ARGS__;                                 \
79       (x)->nr_##name++;                                                        \
80    })
81 
82 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
83 
84 
85 struct fd_device_funcs {
86    /* Create a new buffer object:
87     */
88    struct fd_bo *(*bo_new)(struct fd_device *dev, uint32_t size, uint32_t flags);
89 
90    /* Create a new buffer object from existing handle (ie. dma-buf or
91     * flink import):
92     */
93    struct fd_bo *(*bo_from_handle)(struct fd_device *dev, uint32_t size,
94                                    uint32_t handle);
95    uint32_t (*handle_from_dmabuf)(struct fd_device *dev, int fd);
96    struct fd_bo *(*bo_from_dmabuf)(struct fd_device *dev, int fd);
97    void (*bo_close_handle)(struct fd_bo *bo);
98 
99    struct fd_pipe *(*pipe_new)(struct fd_device *dev, enum fd_pipe_id id,
100                                unsigned prio);
101    int (*flush)(struct fd_device *dev);
102    void (*destroy)(struct fd_device *dev);
103 };
104 
105 struct fd_bo_bucket {
106    uint32_t size;
107    int count, hits, misses, expired;
108    struct list_head list;
109 };
110 
111 struct fd_bo_cache {
112    const char *name;
113    simple_mtx_t lock;
114    struct fd_bo_bucket cache_bucket[14 * 4];
115    int num_buckets;
116    time_t time;
117 };
118 
119 /* Probably good for the block size to be a multiple of an available
120  * large-page size.  For overlap of what both the MMU (with 4kb granule)
121  * and SMMU support, 2MB is that overlap.  (Well, 4kb is as well, but
122  * too small to be practical ;-))
123  */
124 #define FD_BO_HEAP_BLOCK_SIZE (4 * 1024 * 1024)
125 
126 /* Zero is an invalid handle, use it to indicate buffers that have been sub-
127  * allocated from a larger backing heap block buffer.
128  */
129 #define FD_BO_SUBALLOC_HANDLE 0
130 
131 static inline bool
suballoc_bo(struct fd_bo * bo)132 suballoc_bo(struct fd_bo *bo)
133 {
134    return bo->handle == FD_BO_SUBALLOC_HANDLE;
135 }
136 
137 /**
138  * A heap is a virtual range of memory that is backed by N physical buffers,
139  * from which buffers can be suballocated.  This requires kernel support for
140  * userspace allocated iova.
141  */
142 struct fd_bo_heap {
143    struct fd_device *dev;
144 
145    int cnt;
146 
147    /**
148     * Buffer allocation flags for buffers allocated from this heap.
149     */
150    uint32_t flags;
151 
152    simple_mtx_t lock;
153 
154    /**
155     * Ranges of the backing buffer are allocated at a granularity of
156     * SUBALLOC_ALIGNMENT
157     */
158    struct util_vma_heap heap;
159 
160    /**
161     * List of recently freed suballocated BOs from this allocator until they
162     * become idle.  Backend should periodically call fd_bo_suballoc_clean()
163     * to check for newly idle entries on the freelist, so that the memory can
164     * be returned to the free heap.
165     */
166    struct list_head freelist;
167 
168    /**
169     * The backing buffers.  Maximum total heap size is:
170     *   FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks)
171     */
172    struct fd_bo *blocks[256];
173 };
174 
175 struct fd_bo_heap *fd_bo_heap_new(struct fd_device *dev, uint32_t flags);
176 void fd_bo_heap_destroy(struct fd_bo_heap *heap);
177 
178 struct fd_bo *fd_bo_heap_block(struct fd_bo *bo);
179 struct fd_bo *fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size, uint32_t flags);
180 
181 static inline uint32_t
submit_offset(struct fd_bo * bo,uint32_t offset)182 submit_offset(struct fd_bo *bo, uint32_t offset)
183 {
184    if (suballoc_bo(bo)) {
185       offset += bo->iova - fd_bo_heap_block(bo)->iova;
186    }
187    return offset;
188 }
189 
190 struct fd_device {
191    int fd;
192    enum fd_version version;
193    int32_t refcnt;
194 
195    /* tables to keep track of bo's, to avoid "evil-twin" fd_bo objects:
196     *
197     *   handle_table: maps handle to fd_bo
198     *   name_table: maps flink name to fd_bo
199     *
200     * We end up needing two tables, because DRM_IOCTL_GEM_OPEN always
201     * returns a new handle.  So we need to figure out if the bo is already
202     * open in the process first, before calling gem-open.
203     */
204    struct hash_table *handle_table, *name_table;
205 
206    const struct fd_device_funcs *funcs;
207 
208    struct fd_bo_cache bo_cache;
209    struct fd_bo_cache ring_cache;
210 
211    /**
212     * Heap for mappable + cached-coherent + gpu-readonly (ie. cmdstream)
213     */
214    struct fd_bo_heap *ring_heap;
215 
216    /**
217     * Heap for mappable (ie. majority of small buffer allocations, etc)
218     */
219    struct fd_bo_heap *default_heap;
220 
221    bool has_cached_coherent;
222 
223    bool closefd; /* call close(fd) upon destruction */
224 
225    /* just for valgrind: */
226    int bo_size;
227 
228    /**
229     * List of deferred submits, protected by submit_lock.  The deferred
230     * submits are tracked globally per-device, even if they execute in
231     * different order on the kernel side (ie. due to different priority
232     * submitqueues, etc) to preserve the order that they are passed off
233     * to the kernel.  Once the kernel has them, it is the fences' job
234     * to preserve correct order of execution.
235     */
236    struct list_head deferred_submits;
237    struct fd_fence *deferred_submits_fence;
238    unsigned deferred_cmds;
239    simple_mtx_t submit_lock;
240 
241    /**
242     * BO for suballocating long-lived state objects.
243     *
244     * Note: one would be tempted to put this in fd_pipe to avoid locking.
245     * But that is a bad idea for a couple of reasons:
246     *
247     *  1) With TC, stateobj allocation can happen in either frontend thread
248     *     (ie. most CSOs), and also driver thread (a6xx cached tex state)
249     *  2) It is best for fd_pipe to not hold a reference to a BO that can
250     *     be free'd to bo cache, as that can cause unexpected re-entrancy
251     *     (fd_bo_cache_alloc() -> find_in_bucket() -> fd_bo_state() ->
252     *     cleanup_fences() -> drop pipe ref which free's bo's).
253     */
254    struct fd_bo *suballoc_bo;
255    uint32_t suballoc_offset;
256    simple_mtx_t suballoc_lock;
257 
258    struct util_queue submit_queue;
259 
260    struct fd_rd_output rd;
261 };
262 
263 static inline bool
fd_device_threaded_submit(struct fd_device * dev)264 fd_device_threaded_submit(struct fd_device *dev)
265 {
266    return util_queue_is_initialized(&dev->submit_queue);
267 }
268 
269 #define foreach_submit(name, list) \
270    list_for_each_entry(struct fd_submit, name, list, node)
271 #define foreach_submit_safe(name, list) \
272    list_for_each_entry_safe(struct fd_submit, name, list, node)
273 #define last_submit(list) \
274    list_last_entry(list, struct fd_submit, node)
275 
276 #define foreach_bo(name, list) \
277    list_for_each_entry(struct fd_bo, name, list, node)
278 #define foreach_bo_safe(name, list) \
279    list_for_each_entry_safe(struct fd_bo, name, list, node)
280 #define first_bo(list) \
281    list_first_entry(list, struct fd_bo, node)
282 
283 
284 void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse, const char *name);
285 void fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time);
286 struct fd_bo *fd_bo_cache_alloc(struct fd_bo_cache *cache, uint32_t *size,
287                                 uint32_t flags);
288 int fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo);
289 
290 /* for where @fence_lock is already held: */
291 void fd_pipe_del_locked(struct fd_pipe *pipe);
292 
293 struct fd_pipe_funcs {
294    struct fd_ringbuffer *(*ringbuffer_new_object)(struct fd_pipe *pipe,
295                                                   uint32_t size);
296    struct fd_submit *(*submit_new)(struct fd_pipe *pipe);
297 
298    /**
299     * Flush any deferred submits (if deferred submits are supported by
300     * the pipe implementation)
301     */
302    void (*flush)(struct fd_pipe *pipe, uint32_t fence);
303    void (*finish)(struct fd_pipe *pipe);
304 
305    int (*get_param)(struct fd_pipe *pipe, enum fd_param_id param,
306                     uint64_t *value);
307    int (*set_param)(struct fd_pipe *pipe, enum fd_param_id param,
308                     uint64_t value);
309    int (*wait)(struct fd_pipe *pipe, const struct fd_fence *fence,
310                uint64_t timeout);
311    void (*destroy)(struct fd_pipe *pipe);
312 };
313 
314 struct fd_pipe_control {
315    uint32_t fence;
316 };
317 #define control_ptr(pipe, member) \
318    (pipe)->control_mem, offsetof(struct fd_pipe_control, member), 0, 0
319 
320 struct fd_pipe {
321    struct fd_device *dev;
322    enum fd_pipe_id id;
323    struct fd_dev_id dev_id;
324 
325    /**
326     * Note refcnt is *not* atomic, but protected by fence_lock, since the
327     * fence_lock is held in fd_bo_add_fence(), which is the hotpath.
328     */
329    int32_t refcnt;
330 
331    /**
332     * Previous fence seqno allocated for this pipe.  The fd_pipe represents
333     * a single timeline, fences allocated by this pipe can be compared to
334     * each other, but fences from different pipes are not comparable (as
335     * there could be preemption of multiple priority level submitqueues at
336     * play)
337     */
338    uint32_t last_fence;
339 
340    /**
341     * The last fence seqno that was flushed to kernel (doesn't mean that it
342     * is complete, just that the kernel knows about it)
343     */
344    uint32_t last_submit_fence;
345 
346    uint32_t last_enqueue_fence;   /* just for debugging */
347 
348    /**
349     * Counter for assigning each submit a unique seqno.
350     */
351    seqno_t submit_seqno;
352 
353    /**
354     * If we *ever* see an in-fence-fd, assume that userspace is
355     * not relying on implicit fences.
356     */
357    bool no_implicit_sync;
358 
359    bool is_64bit;
360 
361    struct fd_bo *control_mem;
362    volatile struct fd_pipe_control *control;
363 
364    struct slab_parent_pool ring_pool;
365 
366    const struct fd_pipe_funcs *funcs;
367 };
368 
369 uint32_t fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring);
370 
371 static inline void
fd_pipe_flush(struct fd_pipe * pipe,uint32_t fence)372 fd_pipe_flush(struct fd_pipe *pipe, uint32_t fence)
373 {
374    if (!pipe->funcs->flush)
375       return;
376    pipe->funcs->flush(pipe, fence);
377 }
378 
379 struct fd_submit_funcs {
380    struct fd_ringbuffer *(*new_ringbuffer)(struct fd_submit *submit,
381                                            uint32_t size,
382                                            enum fd_ringbuffer_flags flags);
383    struct fd_fence *(*flush)(struct fd_submit *submit, int in_fence_fd,
384                              bool use_fence_fd);
385    void (*destroy)(struct fd_submit *submit);
386 };
387 
388 struct fd_submit {
389    int32_t refcnt;
390    struct fd_pipe *pipe;
391    struct fd_device *dev;
392    const struct fd_submit_funcs *funcs;
393 
394    struct fd_ringbuffer *primary;
395    uint32_t fence;
396    struct list_head node;  /* node in fd_pipe::deferred_submits */
397 };
398 
399 static inline unsigned
fd_dev_count_deferred_cmds(struct fd_device * dev)400 fd_dev_count_deferred_cmds(struct fd_device *dev)
401 {
402    unsigned nr = 0;
403 
404    simple_mtx_assert_locked(&dev->submit_lock);
405 
406    list_for_each_entry (struct fd_submit, submit, &dev->deferred_submits, node) {
407       nr += fd_ringbuffer_cmd_count(submit->primary);
408    }
409 
410    return nr;
411 }
412 
413 struct fd_bo_funcs {
414    int (*offset)(struct fd_bo *bo, uint64_t *offset);
415    void *(*map)(struct fd_bo *bo);
416    int (*cpu_prep)(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op);
417    int (*madvise)(struct fd_bo *bo, int willneed);
418    uint64_t (*iova)(struct fd_bo *bo);
419    void (*set_name)(struct fd_bo *bo, const char *fmt, va_list ap);
420    int (*dmabuf)(struct fd_bo *bo);
421 
422    /**
423     * Optional hook that is called before ->destroy().  In the case of
424     * batch deletes (such as BO cache cleanup or cleaning up a submit)
425     * the ->finalize() hook will be called for all of the BOs being
426     * destroyed followed by dev->flush() and then bo->destroy().  This
427     * allows the backend to batch up processing.  (Ie. this is for
428     * virtio backend to batch ccmds to the host)
429     *
430     * In all cases, dev->flush() will happen after bo->finalize() and
431     * bo->destroy().
432     */
433    void (*finalize)(struct fd_bo *bo);
434    void (*destroy)(struct fd_bo *bo);
435 
436    /**
437     * Optional, copy data into bo, falls back to mmap+memcpy.  If not
438     * implemented, it must be possible to mmap all buffers
439     */
440    void (*upload)(struct fd_bo *bo, void *src, unsigned off, unsigned len);
441 
442    /**
443     * Optional, if upload is supported, should upload be preferred?
444     */
445    bool (*prefer_upload)(struct fd_bo *bo, unsigned len);
446 
447    void (*set_metadata)(struct fd_bo *bo, void *metadata, uint32_t metadata_size);
448    int (*get_metadata)(struct fd_bo *bo, void *metadata, uint32_t metadata_size);
449 };
450 
451 void fd_bo_add_fence(struct fd_bo *bo, struct fd_fence *fence);
452 void *fd_bo_map_os_mmap(struct fd_bo *bo);
453 void *__fd_bo_map(struct fd_bo *bo);
454 
455 enum fd_bo_state {
456    FD_BO_STATE_IDLE,
457    FD_BO_STATE_BUSY,
458    FD_BO_STATE_UNKNOWN,
459 };
460 enum fd_bo_state fd_bo_state(struct fd_bo *bo);
461 
462 void fd_bo_init_common(struct fd_bo *bo, struct fd_device *dev);
463 void fd_bo_fini_fences(struct fd_bo *bo);
464 void fd_bo_fini_common(struct fd_bo *bo);
465 
466 struct fd_bo *fd_bo_new_ring(struct fd_device *dev, uint32_t size);
467 
468 uint32_t fd_handle_from_dmabuf_drm(struct fd_device *dev, int fd);
469 struct fd_bo *fd_bo_from_dmabuf_drm(struct fd_device *dev, int fd);
470 void fd_bo_close_handle_drm(struct fd_bo *bo);
471 
472 #define enable_debug 0 /* TODO make dynamic */
473 
474 bool fd_dbg(void);
475 
476 #define INFO_MSG(fmt, ...)                                                     \
477    do {                                                                        \
478       if (fd_dbg())                                                            \
479          mesa_logi("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);          \
480    } while (0)
481 #define DEBUG_MSG(fmt, ...)                                                    \
482    do                                                                          \
483       if (enable_debug) {                                                      \
484          mesa_logd("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);          \
485       }                                                                        \
486    while (0)
487 #define WARN_MSG(fmt, ...)                                                     \
488    do {                                                                        \
489       mesa_logw("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);             \
490    } while (0)
491 #define ERROR_MSG(fmt, ...)                                                    \
492    do {                                                                        \
493       mesa_loge("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);             \
494    } while (0)
495 
496 #define U642VOID(x) ((void *)(unsigned long)(x))
497 #define VOID2U64(x) ((uint64_t)(unsigned long)(x))
498 
499 #ifdef HAVE_VALGRIND
500 #include <memcheck.h>
501 
502 /*
503  * For tracking the backing memory (if valgrind enabled, we force a mmap
504  * for the purposes of tracking)
505  */
506 static inline void
VG_BO_ALLOC(struct fd_bo * bo)507 VG_BO_ALLOC(struct fd_bo *bo)
508 {
509    if (bo && RUNNING_ON_VALGRIND) {
510       VALGRIND_MALLOCLIKE_BLOCK(fd_bo_map(bo), bo->size, 0, 1);
511    }
512 }
513 
514 static inline void
VG_BO_FREE(struct fd_bo * bo)515 VG_BO_FREE(struct fd_bo *bo)
516 {
517    VALGRIND_FREELIKE_BLOCK(bo->map, 0);
518 }
519 
520 /*
521  * For tracking bo structs that are in the buffer-cache, so that valgrind
522  * doesn't attribute ownership to the first one to allocate the recycled
523  * bo.
524  *
525  * Note that the list_head in fd_bo is used to track the buffers in cache
526  * so disable error reporting on the range while they are in cache so
527  * valgrind doesn't squawk about list traversal.
528  *
529  */
530 static inline void
VG_BO_RELEASE(struct fd_bo * bo)531 VG_BO_RELEASE(struct fd_bo *bo)
532 {
533    if (RUNNING_ON_VALGRIND) {
534       VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
535       VALGRIND_MAKE_MEM_NOACCESS(bo, bo->dev->bo_size);
536       VALGRIND_FREELIKE_BLOCK(bo->map, 0);
537    }
538 }
539 static inline void
VG_BO_OBTAIN(struct fd_bo * bo)540 VG_BO_OBTAIN(struct fd_bo *bo)
541 {
542    if (RUNNING_ON_VALGRIND) {
543       VALGRIND_MAKE_MEM_DEFINED(bo, bo->dev->bo_size);
544       VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
545       VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1);
546    }
547 }
548 /* special case for fd_bo_upload */
549 static inline void
VG_BO_MAPPED(struct fd_bo * bo)550 VG_BO_MAPPED(struct fd_bo *bo)
551 {
552    VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1);
553 }
554 #else
555 static inline void
VG_BO_ALLOC(struct fd_bo * bo)556 VG_BO_ALLOC(struct fd_bo *bo)
557 {
558 }
559 static inline void
VG_BO_FREE(struct fd_bo * bo)560 VG_BO_FREE(struct fd_bo *bo)
561 {
562 }
563 static inline void
VG_BO_RELEASE(struct fd_bo * bo)564 VG_BO_RELEASE(struct fd_bo *bo)
565 {
566 }
567 static inline void
VG_BO_OBTAIN(struct fd_bo * bo)568 VG_BO_OBTAIN(struct fd_bo *bo)
569 {
570 }
571 static inline void
VG_BO_MAPPED(struct fd_bo * bo)572 VG_BO_MAPPED(struct fd_bo *bo)
573 {
574 }
575 #endif
576 
577 #define FD_DEFINE_CAST(parent, child)                                          \
578    static inline struct child *to_##child(struct parent *x)                    \
579    {                                                                           \
580       return (struct child *)x;                                                \
581    }
582 
583 #endif /* FREEDRENO_PRIV_H_ */
584