• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include <xf86drm.h>
28 
29 #include "util/u_math.h"
30 #include "util/macros.h"
31 #include "util/hash_table.h"
32 #include "util/u_thread.h"
33 #include "drm-uapi/panfrost_drm.h"
34 #include "pan_encoder.h"
35 #include "pan_device.h"
36 #include "pan_bo.h"
37 #include "pan_texture.h"
38 #include "wrap.h"
39 #include "pan_util.h"
40 
41 /* Fixed "minimum revisions" */
42 #define NO_ANISO (~0)
43 #define HAS_ANISO (0)
44 
45 #define MODEL(gpu_id_, shortname, counters_, min_rev_anisotropic_, tib_size_, quirks_) \
46         { \
47                 .gpu_id = gpu_id_, \
48                 .name = "Mali-" shortname " (Panfrost)", \
49                 .performance_counters = counters_, \
50                 .min_rev_anisotropic = min_rev_anisotropic_, \
51                 .tilebuffer_size = tib_size_, \
52                 .quirks = quirks_, \
53         }
54 
55 /* Table of supported Mali GPUs */
56 const struct panfrost_model panfrost_model_list[] = {
57         MODEL(0x720, "T720", "T72x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
58         MODEL(0x750, "T760", "T76x", NO_ANISO, 8192, {}),
59         MODEL(0x820, "T820", "T82x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
60         MODEL(0x830, "T830", "T83x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
61         MODEL(0x860, "T860", "T86x", NO_ANISO, 8192, {}),
62         MODEL(0x880, "T880", "T88x", NO_ANISO, 8192, {}),
63 
64         MODEL(0x6000, "G71", "TMIx", NO_ANISO, 8192, {}),
65         MODEL(0x6221, "G72", "THEx", 0x0030 /* r0p3 */, 16384, {}),
66         MODEL(0x7090, "G51", "TSIx", 0x1010 /* r1p1 */, 16384, {}),
67         MODEL(0x7093, "G31", "TDVx", HAS_ANISO, 16384, {}),
68         MODEL(0x7211, "G76", "TNOx", HAS_ANISO, 16384, {}),
69         MODEL(0x7212, "G52", "TGOx", HAS_ANISO, 16384, {}),
70         MODEL(0x7402, "G52 r1", "TGOx", HAS_ANISO, 16384, {}),
71         MODEL(0x9093, "G57", "TNAx", HAS_ANISO, 16384, {}),
72 };
73 
74 #undef NO_ANISO
75 #undef HAS_ANISO
76 #undef MODEL
77 
78 /*
79  * Look up a supported model by its GPU ID, or return NULL if the model is not
80  * supported at this time.
81  */
82 const struct panfrost_model *
panfrost_get_model(uint32_t gpu_id)83 panfrost_get_model(uint32_t gpu_id)
84 {
85         for (unsigned i = 0; i < ARRAY_SIZE(panfrost_model_list); ++i) {
86                 if (panfrost_model_list[i].gpu_id == gpu_id)
87                         return &panfrost_model_list[i];
88         }
89 
90         return NULL;
91 }
92 
93 /* Abstraction over the raw drm_panfrost_get_param ioctl for fetching
94  * information about devices */
95 
96 static __u64
panfrost_query_raw(int fd,enum drm_panfrost_param param,bool required,unsigned default_value)97 panfrost_query_raw(
98                 int fd,
99                 enum drm_panfrost_param param,
100                 bool required,
101                 unsigned default_value)
102 {
103         struct drm_panfrost_get_param get_param = {0,};
104         ASSERTED int ret;
105 
106         get_param.param = param;
107         ret = drmIoctl(fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param);
108 
109         if (ret) {
110                 assert(!required);
111                 return default_value;
112         }
113 
114         return get_param.value;
115 }
116 
117 static unsigned
panfrost_query_gpu_version(int fd)118 panfrost_query_gpu_version(int fd)
119 {
120         return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0);
121 }
122 
123 static unsigned
panfrost_query_gpu_revision(int fd)124 panfrost_query_gpu_revision(int fd)
125 {
126         return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_REVISION, true, 0);
127 }
128 
129 unsigned
panfrost_query_l2_slices(const struct panfrost_device * dev)130 panfrost_query_l2_slices(const struct panfrost_device *dev)
131 {
132         /* Query MEM_FEATURES register */
133         uint32_t mem_features =
134                 panfrost_query_raw(dev->fd, DRM_PANFROST_PARAM_MEM_FEATURES,
135                                    true, 0);
136 
137         /* L2_SLICES is MEM_FEATURES[11:8] minus(1) */
138         return ((mem_features >> 8) & 0xF) + 1;
139 }
140 
141 static struct panfrost_tiler_features
panfrost_query_tiler_features(int fd)142 panfrost_query_tiler_features(int fd)
143 {
144         /* Default value (2^9 bytes and 8 levels) to match old behaviour */
145         uint32_t raw = panfrost_query_raw(fd, DRM_PANFROST_PARAM_TILER_FEATURES,
146                         false, 0x809);
147 
148         /* Bin size is log2 in the first byte, max levels in the second byte */
149         return (struct panfrost_tiler_features) {
150                 .bin_size = (1 << (raw & BITFIELD_MASK(5))),
151                 .max_levels = (raw >> 8) & BITFIELD_MASK(4)
152         };
153 }
154 
155 static unsigned
panfrost_query_core_count(int fd,unsigned * core_id_range)156 panfrost_query_core_count(int fd, unsigned *core_id_range)
157 {
158         /* On older kernels, worst-case to 16 cores */
159 
160         unsigned mask = panfrost_query_raw(fd,
161                         DRM_PANFROST_PARAM_SHADER_PRESENT, false, 0xffff);
162 
163         /* Some cores might be absent. In some cases, we care
164          * about the range of core IDs (that is, the greatest core ID + 1). If
165          * the core mask is contiguous, this equals the core count.
166          */
167         *core_id_range = util_last_bit(mask);
168 
169         /* The actual core count skips overs the gaps */
170         return util_bitcount(mask);
171 }
172 
173 /* Architectural maximums, since this register may be not implemented
174  * by a given chip. G31 is actually 512 instead of 768 but it doesn't
175  * really matter. */
176 
177 static unsigned
panfrost_max_thread_count(unsigned arch)178 panfrost_max_thread_count(unsigned arch)
179 {
180         switch (arch) {
181         /* Midgard */
182         case 4:
183         case 5:
184                 return 256;
185 
186         /* Bifrost, first generation */
187         case 6:
188                 return 384;
189 
190         /* Bifrost, second generation (G31 is 512 but it doesn't matter) */
191         case 7:
192                 return 768;
193 
194         /* Valhall (for completeness) */
195         default:
196                 return 1024;
197         }
198 }
199 
200 static unsigned
panfrost_query_thread_tls_alloc(int fd,unsigned major)201 panfrost_query_thread_tls_alloc(int fd, unsigned major)
202 {
203         unsigned tls = panfrost_query_raw(fd,
204                         DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, false, 0);
205 
206         return (tls > 0) ? tls : panfrost_max_thread_count(major);
207 }
208 
209 static uint32_t
panfrost_query_compressed_formats(int fd)210 panfrost_query_compressed_formats(int fd)
211 {
212         /* If unspecified, assume ASTC/ETC only. Factory default for Juno, and
213          * should exist on any Mali configuration. All hardware should report
214          * these texture formats but the kernel might not be new enough. */
215 
216         uint32_t default_set =
217                 (1 << MALI_ETC2_RGB8) |
218                 (1 << MALI_ETC2_R11_UNORM) |
219                 (1 << MALI_ETC2_RGBA8) |
220                 (1 << MALI_ETC2_RG11_UNORM) |
221                 (1 << MALI_ETC2_R11_SNORM) |
222                 (1 << MALI_ETC2_RG11_SNORM) |
223                 (1 << MALI_ETC2_RGB8A1) |
224                 (1 << MALI_ASTC_3D_LDR) |
225                 (1 << MALI_ASTC_3D_HDR) |
226                 (1 << MALI_ASTC_2D_LDR) |
227                 (1 << MALI_ASTC_2D_HDR);
228 
229         return panfrost_query_raw(fd, DRM_PANFROST_PARAM_TEXTURE_FEATURES0,
230                         false, default_set);
231 }
232 
233 /* DRM_PANFROST_PARAM_TEXTURE_FEATURES0 will return a bitmask of supported
234  * compressed formats, so we offer a helper to test if a format is supported */
235 
236 bool
panfrost_supports_compressed_format(struct panfrost_device * dev,unsigned fmt)237 panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt)
238 {
239         if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED)
240                 return true;
241 
242         unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED;
243         assert(idx < 32);
244 
245         return dev->compressed_formats & (1 << idx);
246 }
247 
248 /*
249  * To pipeline multiple tiles, a given tile may use at most half of the tile
250  * buffer. This function returns the optimal size (assuming pipelining).
251  *
252  * For Mali-G510 and Mali-G310, we will need extra logic to query the tilebuffer
253  * size for the particular variant. The CORE_FEATURES register might help.
254  */
255 static unsigned
panfrost_query_optimal_tib_size(const struct panfrost_device * dev)256 panfrost_query_optimal_tib_size(const struct panfrost_device *dev)
257 {
258         /* Preconditions ensure the returned value is a multiple of 1 KiB, the
259          * granularity of the colour buffer allocation field.
260          */
261         assert(dev->model->tilebuffer_size >= 2048);
262         assert(util_is_power_of_two_nonzero(dev->model->tilebuffer_size));
263 
264         return dev->model->tilebuffer_size / 2;
265 }
266 
267 /* Check for AFBC hardware support. AFBC is introduced in v5. Implementations
268  * may omit it, signaled as a nonzero value in the AFBC_FEATURES property. */
269 
270 static bool
panfrost_query_afbc(int fd,unsigned arch)271 panfrost_query_afbc(int fd, unsigned arch)
272 {
273         unsigned reg = panfrost_query_raw(fd,
274                                           DRM_PANFROST_PARAM_AFBC_FEATURES,
275                                           false, 0);
276 
277         return (arch >= 5) && (reg == 0);
278 }
279 
280 void
panfrost_open_device(void * memctx,int fd,struct panfrost_device * dev)281 panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
282 {
283         dev->fd = fd;
284         dev->memctx = memctx;
285         dev->gpu_id = panfrost_query_gpu_version(fd);
286         dev->arch = pan_arch(dev->gpu_id);
287         dev->kernel_version = drmGetVersion(fd);
288         dev->revision = panfrost_query_gpu_revision(fd);
289         dev->model = panfrost_get_model(dev->gpu_id);
290 
291         /* If we don't recognize the model, bail early */
292         if (!dev->model)
293                 return;
294 
295         dev->core_count = panfrost_query_core_count(fd, &dev->core_id_range);
296         dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(fd, dev->arch);
297         dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev);
298         dev->compressed_formats = panfrost_query_compressed_formats(fd);
299         dev->tiler_features = panfrost_query_tiler_features(fd);
300         dev->has_afbc = panfrost_query_afbc(fd, dev->arch);
301 
302         if (dev->arch <= 6)
303                 dev->formats = panfrost_pipe_format_v6;
304         else if (dev->arch <= 7)
305                 dev->formats = panfrost_pipe_format_v7;
306         else
307                 dev->formats = panfrost_pipe_format_v9;
308 
309         util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512);
310 
311         pthread_mutex_init(&dev->bo_cache.lock, NULL);
312         list_inithead(&dev->bo_cache.lru);
313 
314         for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
315                 list_inithead(&dev->bo_cache.buckets[i]);
316 
317         /* Initialize pandecode before we start allocating */
318         if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
319                 pandecode_initialize(!(dev->debug & PAN_DBG_TRACE));
320 
321         /* Tiler heap is internally required by the tiler, which can only be
322          * active for a single job chain at once, so a single heap can be
323          * shared across batches/contextes */
324 
325         dev->tiler_heap = panfrost_bo_create(dev, 128 * 1024 * 1024,
326                         PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap");
327 
328         pthread_mutex_init(&dev->submit_lock, NULL);
329 
330         /* Done once on init */
331         panfrost_upload_sample_positions(dev);
332 }
333 
334 void
panfrost_close_device(struct panfrost_device * dev)335 panfrost_close_device(struct panfrost_device *dev)
336 {
337         /* If we don't recognize the model, the rest of the device won't exist,
338          * we will have early-exited the device open.
339          */
340         if (dev->model) {
341                 pthread_mutex_destroy(&dev->submit_lock);
342                 panfrost_bo_unreference(dev->tiler_heap);
343                 panfrost_bo_cache_evict_all(dev);
344                 pthread_mutex_destroy(&dev->bo_cache.lock);
345                 util_sparse_array_finish(&dev->bo_map);
346         }
347 
348         drmFreeVersion(dev->kernel_version);
349         close(dev->fd);
350 }
351