1 /**************************************************************************
2 *
3 * Copyright 2018-2019 Alyssa Rosenzweig
4 * Copyright 2018-2019 Collabora, Ltd.
5 * Copyright © 2015 Intel Corporation
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the
17 * next paragraph) shall be included in all copies or substantial portions
18 * of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 *
28 **************************************************************************/
29
30 #ifndef PAN_DEVICE_H
31 #define PAN_DEVICE_H
32
33 #include <xf86drm.h>
34 #include "renderonly/renderonly.h"
35 #include "util/u_dynarray.h"
36 #include "util/bitset.h"
37 #include "util/list.h"
38 #include "util/sparse_array.h"
39
40 #include <midgard_pack.h>
41
42 /* Driver limits */
43 #define PAN_MAX_CONST_BUFFERS 16
44
45 /* Transient slab size. This is a balance between fragmentation against cache
46 * locality and ease of bookkeeping */
47
48 #define TRANSIENT_SLAB_PAGES (16) /* 64kb */
49 #define TRANSIENT_SLAB_SIZE (4096 * TRANSIENT_SLAB_PAGES)
50
51 /* Maximum number of transient slabs so we don't need dynamic arrays. Most
52 * interesting Mali boards are 4GB RAM max, so if the entire RAM was filled
53 * with transient slabs, you could never exceed (4GB / TRANSIENT_SLAB_SIZE)
54 * allocations anyway. By capping, we can use a fixed-size bitset for tracking
55 * free slabs, eliminating quite a bit of complexity. We can pack the free
56 * state of 8 slabs into a single byte, so for 128kb transient slabs the bitset
57 * occupies a cheap 4kb of memory */
58
59 #define MAX_TRANSIENT_SLABS (1024*1024 / TRANSIENT_SLAB_PAGES)
60
61 /* How many power-of-two levels in the BO cache do we want? 2^12
62 * minimum chosen as it is the page size that all allocations are
63 * rounded to */
64
65 #define MIN_BO_CACHE_BUCKET (12) /* 2^12 = 4KB */
66 #define MAX_BO_CACHE_BUCKET (22) /* 2^22 = 4MB */
67
68 /* Fencepost problem, hence the off-by-one */
69 #define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
70
71 /* Cache for blit shaders. Defined here so they can be cached with the device */
72
73 enum pan_blit_type {
74 PAN_BLIT_FLOAT = 0,
75 PAN_BLIT_UINT,
76 PAN_BLIT_INT,
77 PAN_BLIT_NUM_TYPES,
78 };
79
80 #define PAN_BLIT_NUM_TARGETS (12)
81
82 struct pan_blit_shader {
83 mali_ptr shader;
84 uint32_t blend_ret_addr;
85 };
86
87 struct pan_blit_shaders {
88 struct panfrost_bo *bo;
89 struct pan_blit_shader loads[PAN_BLIT_NUM_TARGETS][PAN_BLIT_NUM_TYPES][2];
90 };
91
92 typedef uint32_t mali_pixel_format;
93
94 struct panfrost_format {
95 mali_pixel_format hw;
96 unsigned bind;
97 };
98
99 struct panfrost_device {
100 /* For ralloc */
101 void *memctx;
102
103 int fd;
104
105 /* Properties of the GPU in use */
106 unsigned arch;
107 unsigned gpu_id;
108 unsigned core_count;
109 unsigned thread_tls_alloc;
110 unsigned quirks;
111
112 /* Table of formats, indexed by a PIPE format */
113 const struct panfrost_format *formats;
114
115 /* Bitmask of supported compressed texture formats */
116 uint32_t compressed_formats;
117
118 /* debug flags, see pan_util.h how to interpret */
119 unsigned debug;
120
121 drmVersionPtr kernel_version;
122
123 struct renderonly *ro;
124
125 pthread_mutex_t bo_map_lock;
126 struct util_sparse_array bo_map;
127
128 struct {
129 pthread_mutex_t lock;
130
131 /* List containing all cached BOs sorted in LRU (Least
132 * Recently Used) order. This allows us to quickly evict BOs
133 * that are more than 1 second old.
134 */
135 struct list_head lru;
136
137 /* The BO cache is a set of buckets with power-of-two sizes
138 * ranging from 2^12 (4096, the page size) to
139 * 2^(12 + MAX_BO_CACHE_BUCKETS).
140 * Each bucket is a linked list of free panfrost_bo objects. */
141
142 struct list_head buckets[NR_BO_CACHE_BUCKETS];
143 } bo_cache;
144
145 struct pan_blit_shaders blit_shaders;
146
147 /* Tiler heap shared across all tiler jobs, allocated against the
148 * device since there's only a single tiler. Since this is invisible to
149 * the CPU, it's okay for multiple contexts to reference it
150 * simultaneously; by keeping on the device struct, we eliminate a
151 * costly per-context allocation. */
152
153 struct panfrost_bo *tiler_heap;
154 };
155
156 void
157 panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev);
158
159 void
160 panfrost_close_device(struct panfrost_device *dev);
161
162 bool
163 panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt);
164
165 static inline struct panfrost_bo *
pan_lookup_bo(struct panfrost_device * dev,uint32_t gem_handle)166 pan_lookup_bo(struct panfrost_device *dev, uint32_t gem_handle)
167 {
168 return util_sparse_array_get(&dev->bo_map, gem_handle);
169 }
170
171 #endif
172