• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 VMware, Inc.
3  * Copyright (C) 2014 Broadcom
4  * Copyright (C) 2018 Alyssa Rosenzweig
5  * Copyright (C) 2019 Collabora, Ltd.
6  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25  * SOFTWARE.
26  *
27  */
28 
29 #include "draw/draw_context.h"
30 #include "pipe/p_defines.h"
31 #include "pipe/p_screen.h"
32 #include "util/format/u_format.h"
33 #include "util/format/u_format_s3tc.h"
34 #include "util/os_time.h"
35 #include "util/u_debug.h"
36 #include "util/u_memory.h"
37 #include "util/u_process.h"
38 #include "util/u_screen.h"
39 #include "util/u_video.h"
40 #include "util/xmlconfig.h"
41 
42 #include <fcntl.h>
43 
44 #include "drm-uapi/drm_fourcc.h"
45 #include "drm-uapi/panfrost_drm.h"
46 
47 #include "decode.h"
48 #include "pan_bo.h"
49 #include "pan_fence.h"
50 #include "pan_public.h"
51 #include "pan_resource.h"
52 #include "pan_screen.h"
53 #include "pan_shader.h"
54 #include "pan_texture.h"
55 #include "pan_util.h"
56 
57 #include "pan_context.h"
58 
59 #define DEFAULT_MAX_AFBC_PACKING_RATIO 90
60 
61 /* clang-format off */
62 static const struct debug_named_value panfrost_debug_options[] = {
63    {"perf",       PAN_DBG_PERF,     "Enable performance warnings"},
64    {"trace",      PAN_DBG_TRACE,    "Trace the command stream"},
65    {"dirty",      PAN_DBG_DIRTY,    "Always re-emit all state"},
66    {"sync",       PAN_DBG_SYNC,     "Wait for each job's completion and abort on GPU faults"},
67    {"nofp16",     PAN_DBG_NOFP16,    "Disable 16-bit support"},
68    {"gl3",        PAN_DBG_GL3,      "Enable experimental GL 3.x implementation, up to 3.3"},
69    {"noafbc",     PAN_DBG_NO_AFBC,  "Disable AFBC support"},
70    {"nocrc",      PAN_DBG_NO_CRC,   "Disable transaction elimination"},
71    {"msaa16",     PAN_DBG_MSAA16,   "Enable MSAA 8x and 16x support"},
72    {"linear",     PAN_DBG_LINEAR,   "Force linear textures"},
73    {"nocache",    PAN_DBG_NO_CACHE, "Disable BO cache"},
74    {"dump",       PAN_DBG_DUMP,     "Dump all graphics memory"},
75 #ifdef PAN_DBG_OVERFLOW
76    {"overflow",   PAN_DBG_OVERFLOW, "Check for buffer overflows in pool uploads"},
77 #endif
78    {"yuv",        PAN_DBG_YUV,      "Tint YUV textures with blue for 1-plane and green for 2-plane"},
79    {"forcepack",  PAN_DBG_FORCE_PACK,  "Force packing of AFBC textures on upload"},
80    {"cs",         PAN_DBG_CS,       "Enable extra checks in command stream"},
81    DEBUG_NAMED_VALUE_END
82 };
83 /* clang-format on */
84 
85 static const char *
panfrost_get_name(struct pipe_screen * screen)86 panfrost_get_name(struct pipe_screen *screen)
87 {
88    return pan_device(screen)->model->name;
89 }
90 
91 static const char *
panfrost_get_vendor(struct pipe_screen * screen)92 panfrost_get_vendor(struct pipe_screen *screen)
93 {
94    return "Mesa";
95 }
96 
97 static const char *
panfrost_get_device_vendor(struct pipe_screen * screen)98 panfrost_get_device_vendor(struct pipe_screen *screen)
99 {
100    return "Arm";
101 }
102 
103 static int
from_kmod_group_allow_priority_flags(enum pan_kmod_group_allow_priority_flags kmod_flags)104 from_kmod_group_allow_priority_flags(
105    enum pan_kmod_group_allow_priority_flags kmod_flags)
106 {
107    int flags = 0;
108 
109    if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME)
110       flags |= PIPE_CONTEXT_PRIORITY_REALTIME;
111 
112    if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH)
113       flags |= PIPE_CONTEXT_PRIORITY_HIGH;
114 
115    if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM)
116       flags |= PIPE_CONTEXT_PRIORITY_MEDIUM;
117 
118    if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW)
119       flags |= PIPE_CONTEXT_PRIORITY_LOW;
120 
121    return flags;
122 }
123 
124 static int
panfrost_get_shader_param(struct pipe_screen * screen,enum pipe_shader_type shader,enum pipe_shader_cap param)125 panfrost_get_shader_param(struct pipe_screen *screen,
126                           enum pipe_shader_type shader,
127                           enum pipe_shader_cap param)
128 {
129    struct panfrost_device *dev = pan_device(screen);
130    bool is_nofp16 = dev->debug & PAN_DBG_NOFP16;
131 
132    switch (shader) {
133    case PIPE_SHADER_VERTEX:
134    case PIPE_SHADER_FRAGMENT:
135    case PIPE_SHADER_COMPUTE:
136       break;
137    default:
138       return 0;
139    }
140 
141    /* We only allow observable side effects (memory writes) in compute and
142     * fragment shaders. Side effects in the geometry pipeline cause
143     * trouble with IDVS and conflict with our transform feedback lowering.
144     */
145    bool allow_side_effects = (shader != PIPE_SHADER_VERTEX);
146 
147    switch (param) {
148    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
149    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
150    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
151    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
152       return 16384; /* arbitrary */
153 
154    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
155       return 1024; /* arbitrary */
156 
157    case PIPE_SHADER_CAP_MAX_INPUTS:
158       /* Used as ABI on Midgard */
159       return 16;
160 
161    case PIPE_SHADER_CAP_MAX_OUTPUTS:
162       return shader == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
163 
164    case PIPE_SHADER_CAP_MAX_TEMPS:
165       return 256; /* arbitrary */
166 
167    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
168       return 16 * 1024 * sizeof(float);
169 
170    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
171       STATIC_ASSERT(PAN_MAX_CONST_BUFFERS < 0x100);
172       return PAN_MAX_CONST_BUFFERS;
173 
174    case PIPE_SHADER_CAP_CONT_SUPPORTED:
175       return 0;
176 
177    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
178       return dev->arch >= 6;
179 
180    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
181       return 1;
182 
183    case PIPE_SHADER_CAP_SUBROUTINES:
184       return 0;
185 
186    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
187       return 0;
188 
189    case PIPE_SHADER_CAP_INTEGERS:
190       return 1;
191 
192       /* The Bifrost compiler supports full 16-bit. Midgard could but int16
193        * support is untested, so restrict INT16 to Bifrost. Midgard
194        * architecturally cannot support fp16 derivatives. */
195 
196    case PIPE_SHADER_CAP_FP16:
197    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
198       return !is_nofp16;
199    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
200    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
201       return dev->arch >= 6 && !is_nofp16;
202    case PIPE_SHADER_CAP_INT16:
203       /* Blocked on https://gitlab.freedesktop.org/mesa/mesa/-/issues/6075 */
204       return false;
205 
206    case PIPE_SHADER_CAP_INT64_ATOMICS:
207    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
208       return 0;
209 
210    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
211       STATIC_ASSERT(PIPE_MAX_SAMPLERS < 0x10000);
212       return PIPE_MAX_SAMPLERS;
213 
214    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
215       STATIC_ASSERT(PIPE_MAX_SHADER_SAMPLER_VIEWS < 0x10000);
216       return PIPE_MAX_SHADER_SAMPLER_VIEWS;
217 
218    case PIPE_SHADER_CAP_SUPPORTED_IRS:
219       return (1 << PIPE_SHADER_IR_NIR);
220 
221    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
222       return allow_side_effects ? 16 : 0;
223 
224    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
225       return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;
226 
227    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
228    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
229       return 0;
230 
231    default:
232       return 0;
233    }
234 
235    return 0;
236 }
237 
238 static uint32_t
pipe_to_pan_bind_flags(uint32_t pipe_bind_flags)239 pipe_to_pan_bind_flags(uint32_t pipe_bind_flags)
240 {
241    static_assert(PIPE_BIND_DEPTH_STENCIL == PAN_BIND_DEPTH_STENCIL, "");
242    static_assert(PIPE_BIND_RENDER_TARGET == PAN_BIND_RENDER_TARGET, "");
243    static_assert(PIPE_BIND_SAMPLER_VIEW == PAN_BIND_SAMPLER_VIEW, "");
244    static_assert(PIPE_BIND_VERTEX_BUFFER == PAN_BIND_VERTEX_BUFFER, "");
245 
246    return pipe_bind_flags & (PAN_BIND_DEPTH_STENCIL | PAN_BIND_RENDER_TARGET |
247                              PAN_BIND_VERTEX_BUFFER | PAN_BIND_SAMPLER_VIEW);
248 }
249 
250 /**
251  * Query format support for creating a texture, drawing surface, etc.
252  * \param format  the format to test
253  * \param type  one of PIPE_TEXTURE, PIPE_SURFACE
254  */
255 static bool
panfrost_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned bind)256 panfrost_is_format_supported(struct pipe_screen *screen,
257                              enum pipe_format format,
258                              enum pipe_texture_target target,
259                              unsigned sample_count,
260                              unsigned storage_sample_count, unsigned bind)
261 {
262    struct panfrost_device *dev = pan_device(screen);
263 
264    /* MSAA 2x gets rounded up to 4x. MSAA 8x/16x only supported on v5+.
265     * TODO: debug MSAA 8x/16x */
266 
267    switch (sample_count) {
268    case 0:
269    case 1:
270    case 4:
271       break;
272    case 8:
273    case 16:
274       if (dev->debug & PAN_DBG_MSAA16)
275          break;
276       else
277          return false;
278    default:
279       return false;
280    }
281 
282    if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
283       return false;
284 
285    /* Z16 causes dEQP failures on t720 */
286    if (format == PIPE_FORMAT_Z16_UNORM && dev->arch <= 4)
287       return false;
288 
289    /* Check we support the format with the given bind */
290 
291    unsigned pan_bind_flags = pipe_to_pan_bind_flags(bind);
292    struct panfrost_format fmt = dev->formats[format];
293    unsigned fmt_bind_flags = fmt.bind;
294 
295    /* Also check that compressed texture formats are supported on this
296     * particular chip. They may not be depending on system integration
297     * differences. */
298 
299    bool supported =
300       !util_format_is_compressed(format) ||
301       panfrost_supports_compressed_format(dev, fmt.texfeat_bit);
302 
303    if (!supported)
304       return false;
305 
306    if (bind & PIPE_BIND_DEPTH_STENCIL) {
307       /* On panfrost, S8_UINT is actually stored as X8S8_UINT, which
308        * causes us headaches when we try to bind it as DEPTH_STENCIL;
309        * the gallium driver doesn't handle this correctly. So reject
310        * it for now.
311        */
312       switch (format) {
313       case PIPE_FORMAT_S8_UINT:
314          fmt_bind_flags &= ~PAN_BIND_DEPTH_STENCIL;
315          break;
316       default:
317          /* no other special handling required yet */
318          break;
319       }
320    }
321 
322    return MALI_EXTRACT_INDEX(fmt.hw) &&
323       ((pan_bind_flags & ~fmt_bind_flags) == 0);
324 }
325 
326 static void
panfrost_query_compression_rates(struct pipe_screen * screen,enum pipe_format format,int max,uint32_t * rates,int * count)327 panfrost_query_compression_rates(struct pipe_screen *screen,
328                                  enum pipe_format format, int max,
329                                  uint32_t *rates, int *count)
330 {
331    struct panfrost_device *dev = pan_device(screen);
332 
333    if (!dev->has_afrc) {
334       *count = 0;
335       return;
336    }
337 
338    *count = panfrost_afrc_query_rates(format, max, rates);
339 }
340 
341 /* We always support linear and tiled operations, both external and internal.
342  * We support AFBC for a subset of formats, and colourspace transform for a
343  * subset of those. */
344 
345 static void
panfrost_walk_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count,uint64_t test_modifier,bool allow_afrc)346 panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen,
347                                enum pipe_format format, int max,
348                                uint64_t *modifiers, unsigned int *external_only,
349                                int *out_count, uint64_t test_modifier, bool allow_afrc)
350 {
351    /* Query AFBC status */
352    struct panfrost_device *dev = pan_device(screen);
353    bool afbc =
354       dev->has_afbc && panfrost_format_supports_afbc(dev->arch, format);
355    bool ytr = panfrost_afbc_can_ytr(format);
356    bool tiled_afbc = panfrost_afbc_can_tile(dev->arch);
357    bool afrc = allow_afrc && dev->has_afrc && panfrost_format_supports_afrc(format);
358 
359    unsigned count = 0;
360 
361    for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
362       if (drm_is_afbc(pan_best_modifiers[i])) {
363          if (!afbc)
364             continue;
365 
366          if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_SPLIT) &&
367              !panfrost_afbc_can_split(dev->arch, format, pan_best_modifiers[i]))
368             continue;
369 
370          if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_YTR) && !ytr)
371             continue;
372 
373          if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_TILED) && !tiled_afbc)
374             continue;
375       }
376 
377       if (drm_is_afrc(pan_best_modifiers[i]) && !afrc)
378          continue;
379 
380       if (drm_is_mtk_tiled(format, pan_best_modifiers[i]) &&
381           !panfrost_format_supports_mtk_tiled(format))
382          continue;
383 
384       if (test_modifier != DRM_FORMAT_MOD_INVALID &&
385           test_modifier != pan_best_modifiers[i])
386          continue;
387 
388       if (max > (int)count) {
389          modifiers[count] = pan_best_modifiers[i];
390 
391          if (external_only)
392             external_only[count] = drm_is_mtk_tiled(format, modifiers[count]);
393       }
394       count++;
395    }
396 
397    *out_count = count;
398 }
399 
400 static void
panfrost_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count)401 panfrost_query_dmabuf_modifiers(struct pipe_screen *screen,
402                                 enum pipe_format format, int max,
403                                 uint64_t *modifiers,
404                                 unsigned int *external_only, int *out_count)
405 {
406    panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers, external_only,
407                                   out_count, DRM_FORMAT_MOD_INVALID, true);
408 }
409 
410 static void
panfrost_query_compression_modifiers(struct pipe_screen * screen,enum pipe_format format,uint32_t rate,int max,uint64_t * modifiers,int * count)411 panfrost_query_compression_modifiers(struct pipe_screen *screen,
412                                      enum pipe_format format, uint32_t rate,
413                                      int max, uint64_t *modifiers, int *count)
414 {
415    struct panfrost_device *dev = pan_device(screen);
416 
417    if (rate == PIPE_COMPRESSION_FIXED_RATE_NONE)
418       /* no compression requested, return all non-afrc formats */
419       panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers,
420                                      NULL, /* external_only */
421                                      count,
422                                      DRM_FORMAT_MOD_INVALID,
423                                      false /* disallow afrc */);
424    else if (dev->has_afrc)
425       *count = panfrost_afrc_get_modifiers(format, rate, max, modifiers);
426    else
427       *count = 0;  /* compression requested but not supported */
428 }
429 
430 static bool
panfrost_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)431 panfrost_is_dmabuf_modifier_supported(struct pipe_screen *screen,
432                                       uint64_t modifier,
433                                       enum pipe_format format,
434                                       bool *external_only)
435 {
436    uint64_t unused;
437    unsigned int uint_extern_only = 0;
438    int count;
439 
440    panfrost_walk_dmabuf_modifiers(screen, format, 1, &unused, &uint_extern_only,
441                                   &count, modifier, true);
442 
443    if (external_only)
444       *external_only = uint_extern_only ? true : false;
445 
446    return count > 0;
447 }
448 
449 static int
panfrost_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)450 panfrost_get_compute_param(struct pipe_screen *pscreen,
451                            enum pipe_shader_ir ir_type,
452                            enum pipe_compute_cap param, void *ret)
453 {
454    struct panfrost_device *dev = pan_device(pscreen);
455    const char *const ir = "panfrost";
456 
457 #define RET(x)                                                                 \
458    do {                                                                        \
459       if (ret)                                                                 \
460          memcpy(ret, x, sizeof(x));                                            \
461       return sizeof(x);                                                        \
462    } while (0)
463 
464    switch (param) {
465    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
466       RET((uint32_t[]){64});
467 
468    case PIPE_COMPUTE_CAP_IR_TARGET:
469       if (ret)
470          sprintf(ret, "%s", ir);
471       return strlen(ir) * sizeof(char);
472 
473    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
474       RET((uint64_t[]){3});
475 
476    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
477       RET(((uint64_t[]){65535, 65535, 65535}));
478 
479    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
480       /* Unpredictable behaviour at larger sizes. Mali-G52 advertises
481        * 384x384x384.
482        *
483        * On Midgard, we don't allow more than 128 threads in each
484        * direction to match PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK.
485        * That still exceeds the minimum-maximum.
486        */
487       if (dev->arch >= 6)
488          RET(((uint64_t[]){256, 256, 256}));
489       else
490          RET(((uint64_t[]){128, 128, 128}));
491 
492    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
493       /* On Bifrost and newer, all GPUs can support at least 256 threads
494        * regardless of register usage, so we report 256.
495        *
496        * On Midgard, with maximum register usage, the maximum
497        * thread count is only 64. We would like to report 64 here, but
498        * the GLES3.1 spec minimum is 128, so we report 128 and limit
499        * the register allocation of affected compute kernels.
500        */
501       RET((uint64_t[]){dev->arch >= 6 ? 256 : 128});
502 
503    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
504    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
505       uint64_t total_ram;
506 
507       if (!os_get_total_physical_memory(&total_ram))
508          return 0;
509 
510       /* We don't want to burn too much ram with the GPU. If the user has 4GiB
511        * or less, we use at most half. If they have more than 4GiB, we use 3/4.
512        */
513       uint64_t available_ram;
514       if (total_ram <= 4ull * 1024 * 1024 * 1024)
515          available_ram = total_ram / 2;
516       else
517          available_ram = total_ram * 3 / 4;
518 
519       /* 48bit address space max, with the lower 32MB reserved. We clamp
520        * things so it matches kmod VA range limitations.
521        */
522       uint64_t user_va_start =
523          panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_START);
524       uint64_t user_va_end =
525          panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_END);
526 
527       /* We cannot support more than the VA limit */
528       RET((uint64_t[]){MIN2(available_ram, user_va_end - user_va_start)});
529    }
530 
531    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
532       RET((uint64_t[]){32768});
533 
534    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
535    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
536       RET((uint64_t[]){4096});
537 
538    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
539       RET((uint32_t[]){800 /* MHz -- TODO */});
540 
541    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
542       RET((uint32_t[]){dev->core_count});
543 
544    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
545       RET((uint32_t[]){1});
546 
547    case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
548       RET((uint32_t[]){pan_subgroup_size(dev->arch)});
549 
550    case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
551       RET((uint32_t[]){0 /* TODO */});
552 
553    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
554       RET((uint64_t[]){1024}); // TODO
555    }
556 
557    return 0;
558 }
559 
560 static void
panfrost_init_screen_caps(struct panfrost_screen * screen)561 panfrost_init_screen_caps(struct panfrost_screen *screen)
562 {
563    struct pipe_caps *caps = (struct pipe_caps *)&screen->base.caps;
564 
565    u_init_pipe_screen_caps(&screen->base, 1);
566 
567    struct panfrost_device *dev = &screen->dev;
568 
569    /* Our GL 3.x implementation is WIP */
570    bool is_gl3 = dev->debug & PAN_DBG_GL3;
571 
572    /* Native MRT is introduced with v5 */
573    bool has_mrt = (dev->arch >= 5);
574 
575    caps->npot_textures = true;
576    caps->mixed_color_depth_bits = true;
577    caps->fragment_shader_texture_lod = true;
578    caps->vertex_color_unclamped = true;
579    caps->depth_clip_disable = true;
580    caps->mixed_framebuffer_sizes = true;
581    caps->frontend_noop = true;
582    caps->sample_shading = true;
583    caps->fragment_shader_derivatives = true;
584    caps->framebuffer_no_attachment = true;
585    caps->quads_follow_provoking_vertex_convention = true;
586    caps->shader_pack_half_float = true;
587    caps->has_const_bw = true;
588 
589    /* Removed in v9 (Valhall) */
590    caps->depth_clip_disable_separate = dev->arch < 9;
591 
592    caps->max_render_targets =
593    caps->fbfetch = has_mrt ? 8 : 1;
594    caps->fbfetch_coherent = true;
595 
596    caps->max_dual_source_render_targets = 1;
597 
598    caps->occlusion_query = true;
599    caps->primitive_restart = true;
600    caps->primitive_restart_fixed_index = true;
601 
602    caps->anisotropic_filter =
603       panfrost_device_gpu_rev(dev) >= dev->model->min_rev_anisotropic;
604 
605    /* Compile side is done for Bifrost, Midgard TODO. Needs some kernel
606     * work to turn on, since CYCLE_COUNT_START needs to be issued. In
607     * kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
608     * yet way to request this with mainline TODO */
609    caps->shader_clock = false;
610 
611    caps->vs_instanceid = true;
612    caps->texture_multisample = true;
613    caps->surface_sample_count = true;
614 
615    caps->sampler_view_target = true;
616    caps->clip_halfz = true;
617    caps->polygon_offset_clamp = true;
618    caps->texture_swizzle = true;
619    caps->texture_mirror_clamp_to_edge = true;
620    caps->vertex_element_instance_divisor = true;
621    caps->blend_equation_separate = true;
622    caps->indep_blend_enable = true;
623    caps->indep_blend_func = true;
624    caps->generate_mipmap = true;
625    caps->uma = true;
626    caps->texture_float_linear = true;
627    caps->texture_half_float_linear = true;
628    caps->shader_array_components = true;
629    caps->texture_buffer_objects = true;
630    caps->packed_uniforms = true;
631    caps->image_load_formatted = true;
632    caps->cube_map_array = true;
633    caps->compute = true;
634    caps->int64 = true;
635 
636    caps->copy_between_compressed_and_plain_formats = true;
637 
638    caps->max_stream_output_buffers = PIPE_MAX_SO_BUFFERS;
639 
640    caps->max_stream_output_separate_components =
641    caps->max_stream_output_interleaved_components = PIPE_MAX_SO_OUTPUTS;
642 
643    caps->stream_output_pause_resume = true;
644    caps->stream_output_interleave_buffers = true;
645 
646    caps->max_texture_array_layers = 2048;
647 
648    caps->glsl_feature_level =
649    caps->glsl_feature_level_compatibility = is_gl3 ? 330 : 140;
650    caps->essl_feature_level = dev->arch >= 6 ? 320 : 310;
651 
652    caps->constant_buffer_offset_alignment = 16;
653 
654    /* v7 (only) restricts component orders with AFBC. To workaround, we
655     * compose format swizzles with texture swizzles. pan_texture.c motsly
656     * handles this but we need to fix up the border colour.
657     */
658    caps->texture_border_color_quirk = dev->arch == 7 || dev->arch >= 10 ?
659       PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO : 0;
660 
661    caps->max_texel_buffer_elements = PAN_MAX_TEXEL_BUFFER_ELEMENTS;
662 
663    /* Must be at least 64 for correct behaviour */
664    caps->texture_buffer_offset_alignment = 64;
665 
666    caps->query_time_elapsed =
667    caps->query_timestamp =
668       dev->kmod.props.gpu_can_query_timestamp &&
669       dev->kmod.props.timestamp_frequency != 0;
670 
671    if (caps->query_timestamp)
672       caps->timer_resolution = pan_gpu_time_to_ns(dev, 1);
673 
674    /* The hardware requires element alignment for data conversion to work
675     * as expected. If data conversion is not required, this restriction is
676     * lifted on Midgard at a performance penalty. We conservatively
677     * require element alignment for vertex buffers, using u_vbuf to
678     * translate to match the hardware requirement.
679     *
680     * This is less heavy-handed than PIPE_VERTEX_INPUT_ALIGNMENT_4BYTE, which
681     * would needlessly require alignment even for 8-bit formats.
682     */
683    caps->vertex_input_alignment = PIPE_VERTEX_INPUT_ALIGNMENT_ELEMENT;
684 
685    caps->max_texture_2d_size = 1 << (PAN_MAX_MIP_LEVELS - 1);
686 
687    caps->max_texture_3d_levels =
688    caps->max_texture_cube_levels = PAN_MAX_MIP_LEVELS;
689 
690    /* pixel coord is in integer sysval on bifrost. */
691    caps->fs_coord_pixel_center_integer = dev->arch >= 6;
692    caps->fs_coord_pixel_center_half_integer = dev->arch < 6;
693 
694    /* Hardware is upper left */
695    caps->fs_coord_origin_lower_left = false;
696 
697    caps->fs_coord_origin_upper_left = true;
698    caps->tgsi_texcoord = true;
699 
700    /* We would prefer varyings on Midgard, but proper sysvals on Bifrost */
701    caps->fs_face_is_integer_sysval =
702    caps->fs_position_is_sysval =
703    caps->fs_point_is_sysval = dev->arch >= 6;
704 
705    caps->seamless_cube_map = true;
706    caps->seamless_cube_map_per_texture = true;
707 
708    caps->max_vertex_element_src_offset = 0xffff;
709 
710    caps->texture_transfer_modes = 0;
711 
712    caps->endianness = PIPE_ENDIAN_NATIVE;
713 
714    caps->max_texture_gather_components = 4;
715 
716    caps->min_texture_gather_offset = -8;
717 
718    caps->max_texture_gather_offset = 7;
719 
720    uint64_t system_memory;
721    caps->video_memory = os_get_total_physical_memory(&system_memory) ?
722       system_memory >> 20 : 0;
723 
724    caps->shader_stencil_export = true;
725    caps->conditional_render = true;
726    caps->conditional_render_inverted = true;
727 
728    caps->shader_buffer_offset_alignment = 4;
729 
730    caps->max_varyings = dev->arch >= 9 ? 16 : 32;
731 
732    /* Removed in v6 (Bifrost) */
733    caps->gl_clamp =
734    caps->texture_mirror_clamp =
735    caps->alpha_test = dev->arch <= 5;
736 
737    /* Removed in v9 (Valhall). PRIMTIIVE_RESTART_FIXED_INDEX is of course
738     * still supported as it is core GLES3.0 functionality
739     */
740    caps->emulate_nonfixed_primitive_restart = dev->arch >= 9;
741 
742    caps->flatshade = false;
743    caps->two_sided_color = false;
744    caps->clip_planes = 0;
745 
746    caps->packed_stream_output = false;
747 
748    caps->viewport_transform_lowered = true;
749    caps->psiz_clamped = true;
750 
751    caps->nir_images_as_deref = false;
752 
753    caps->draw_indirect = true;
754 
755    caps->multi_draw_indirect = dev->arch >= 10;
756 
757    caps->start_instance =
758    caps->draw_parameters = pan_is_bifrost(dev);
759 
760    /* Mali supports GLES and QUADS. Midgard and v6 Bifrost
761     * support more */
762    uint32_t modes = BITFIELD_MASK(MESA_PRIM_QUADS + 1);
763 
764    if (dev->arch <= 6) {
765       modes |= BITFIELD_BIT(MESA_PRIM_QUAD_STRIP);
766       modes |= BITFIELD_BIT(MESA_PRIM_POLYGON);
767    }
768 
769    if (dev->arch >= 9) {
770       /* Although Valhall is supposed to support quads, they
771        * don't seem to work correctly. Disable to fix
772        * arb-provoking-vertex-render.
773        */
774       modes &= ~BITFIELD_BIT(MESA_PRIM_QUADS);
775    }
776 
777    caps->supported_prim_modes =
778    caps->supported_prim_modes_with_restart = modes;
779 
780    caps->image_store_formatted = true;
781 
782    caps->native_fence_fd = true;
783 
784    caps->context_priority_mask =
785       from_kmod_group_allow_priority_flags(
786          dev->kmod.props.allowed_group_priorities_mask);
787 
788    caps->astc_decode_mode = dev->arch >= 9 && (dev->compressed_formats & (1 << 30));
789 
790    caps->min_line_width =
791    caps->min_line_width_aa =
792    caps->min_point_size =
793    caps->min_point_size_aa = 1;
794 
795    caps->point_size_granularity =
796    caps->line_width_granularity = 0.0625;
797 
798    caps->max_line_width =
799    caps->max_line_width_aa =
800    caps->max_point_size =
801    caps->max_point_size_aa = 4095.9375;
802 
803    caps->max_texture_anisotropy = 16.0;
804 
805    caps->max_texture_lod_bias = 16.0; /* arbitrary */
806 }
807 
808 static void
panfrost_destroy_screen(struct pipe_screen * pscreen)809 panfrost_destroy_screen(struct pipe_screen *pscreen)
810 {
811    struct panfrost_device *dev = pan_device(pscreen);
812    struct panfrost_screen *screen = pan_screen(pscreen);
813 
814    panfrost_resource_screen_destroy(pscreen);
815    panfrost_pool_cleanup(&screen->mempools.bin);
816    panfrost_pool_cleanup(&screen->mempools.desc);
817    pan_blend_shader_cache_cleanup(&dev->blend_shaders);
818 
819    if (screen->vtbl.screen_destroy)
820       screen->vtbl.screen_destroy(pscreen);
821 
822    if (dev->ro)
823       dev->ro->destroy(dev->ro);
824    panfrost_close_device(dev);
825 
826    disk_cache_destroy(screen->disk_cache);
827    ralloc_free(pscreen);
828 }
829 
830 static const void *
panfrost_screen_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)831 panfrost_screen_get_compiler_options(struct pipe_screen *pscreen,
832                                      enum pipe_shader_ir ir,
833                                      enum pipe_shader_type shader)
834 {
835    return pan_screen(pscreen)->vtbl.get_compiler_options();
836 }
837 
838 static struct disk_cache *
panfrost_get_disk_shader_cache(struct pipe_screen * pscreen)839 panfrost_get_disk_shader_cache(struct pipe_screen *pscreen)
840 {
841    return pan_screen(pscreen)->disk_cache;
842 }
843 
844 static int
panfrost_get_screen_fd(struct pipe_screen * pscreen)845 panfrost_get_screen_fd(struct pipe_screen *pscreen)
846 {
847    return panfrost_device_fd(pan_device(pscreen));
848 }
849 
850 int
panfrost_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)851 panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
852                                struct pipe_driver_query_info *info)
853 {
854    int num_queries = ARRAY_SIZE(panfrost_driver_query_list);
855 
856    if (!info)
857       return num_queries;
858 
859    if (index >= num_queries)
860       return 0;
861 
862    *info = panfrost_driver_query_list[index];
863 
864    return 1;
865 }
866 
867 static uint64_t
panfrost_get_timestamp(struct pipe_screen * pscreen)868 panfrost_get_timestamp(struct pipe_screen *pscreen)
869 {
870    struct panfrost_device *dev = pan_device(pscreen);
871 
872    return pan_gpu_time_to_ns(dev, pan_kmod_query_timestamp(dev->kmod.dev));
873 }
874 
875 struct pipe_screen *
panfrost_create_screen(int fd,const struct pipe_screen_config * config,struct renderonly * ro)876 panfrost_create_screen(int fd, const struct pipe_screen_config *config,
877                        struct renderonly *ro)
878 {
879    /* Create the screen */
880    struct panfrost_screen *screen = rzalloc(NULL, struct panfrost_screen);
881 
882    if (!screen)
883       return NULL;
884 
885    struct panfrost_device *dev = pan_device(&screen->base);
886 
887    driParseConfigFiles(config->options, config->options_info, 0,
888                        "panfrost", NULL, NULL, NULL, 0, NULL, 0);
889 
890    /* Debug must be set first for pandecode to work correctly */
891    dev->debug =
892       debug_get_flags_option("PAN_MESA_DEBUG", panfrost_debug_options, 0);
893    screen->max_afbc_packing_ratio = debug_get_num_option(
894       "PAN_MAX_AFBC_PACKING_RATIO", DEFAULT_MAX_AFBC_PACKING_RATIO);
895 
896    if (panfrost_open_device(screen, fd, dev)) {
897       ralloc_free(screen);
898       return NULL;
899    }
900 
901    if (dev->debug & PAN_DBG_NO_AFBC)
902       dev->has_afbc = false;
903 
904    /* Bail early on unsupported hardware */
905    if (dev->model == NULL) {
906       debug_printf("panfrost: Unsupported model %X",
907                    panfrost_device_gpu_id(dev));
908       panfrost_destroy_screen(&(screen->base));
909       return NULL;
910    }
911 
912    screen->force_afbc_packing = dev->debug & PAN_DBG_FORCE_PACK;
913    if (!screen->force_afbc_packing)
914       screen->force_afbc_packing = driQueryOptionb(config->options,
915                                                    "pan_force_afbc_packing");
916 
917    const char *option = debug_get_option("PAN_AFRC_RATE", NULL);
918    if (!option) {
919       screen->force_afrc_rate = -1;
920    } else if (strcmp(option, "default") == 0) {
921       screen->force_afrc_rate = PIPE_COMPRESSION_FIXED_RATE_DEFAULT;
922    } else {
923       int64_t rate =
924          debug_parse_num_option(option, PIPE_COMPRESSION_FIXED_RATE_NONE);
925       screen->force_afrc_rate = rate;
926    }
927 
928    screen->csf_tiler_heap.chunk_size = driQueryOptioni(config->options,
929                                                        "pan_csf_chunk_size");
930    screen->csf_tiler_heap.initial_chunks = driQueryOptioni(config->options,
931                                                            "pan_csf_initial_chunks");
932    screen->csf_tiler_heap.max_chunks = driQueryOptioni(config->options,
933                                                        "pan_csf_max_chunks");
934 
935    dev->ro = ro;
936 
937    screen->base.destroy = panfrost_destroy_screen;
938 
939    screen->base.get_screen_fd = panfrost_get_screen_fd;
940    screen->base.get_name = panfrost_get_name;
941    screen->base.get_vendor = panfrost_get_vendor;
942    screen->base.get_device_vendor = panfrost_get_device_vendor;
943    screen->base.get_driver_query_info = panfrost_get_driver_query_info;
944    screen->base.get_shader_param = panfrost_get_shader_param;
945    screen->base.get_compute_param = panfrost_get_compute_param;
946    screen->base.get_timestamp = panfrost_get_timestamp;
947    screen->base.is_format_supported = panfrost_is_format_supported;
948    screen->base.query_dmabuf_modifiers = panfrost_query_dmabuf_modifiers;
949    screen->base.is_dmabuf_modifier_supported =
950       panfrost_is_dmabuf_modifier_supported;
951    screen->base.context_create = panfrost_create_context;
952    screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
953    screen->base.get_disk_shader_cache = panfrost_get_disk_shader_cache;
954    screen->base.fence_reference = panfrost_fence_reference;
955    screen->base.fence_finish = panfrost_fence_finish;
956    screen->base.fence_get_fd = panfrost_fence_get_fd;
957    screen->base.set_damage_region = panfrost_resource_set_damage_region;
958    screen->base.query_compression_rates = panfrost_query_compression_rates;
959    screen->base.query_compression_modifiers =
960       panfrost_query_compression_modifiers;
961 
962    panfrost_resource_screen_init(&screen->base);
963    pan_blend_shader_cache_init(&dev->blend_shaders,
964                                panfrost_device_gpu_id(dev));
965 
966    panfrost_init_screen_caps(screen);
967 
968    panfrost_disk_cache_init(screen);
969 
970    if (panfrost_pool_init(&screen->mempools.bin, NULL, dev, PAN_BO_EXECUTE,
971                           4096, "Preload shaders", false, true) ||
972        panfrost_pool_init(&screen->mempools.desc, NULL, dev, 0, 65536,
973                           "Preload RSDs", false, true)) {
974       panfrost_destroy_screen(&(screen->base));
975       return NULL;
976    }
977 
978    if (dev->arch == 4)
979       panfrost_cmdstream_screen_init_v4(screen);
980    else if (dev->arch == 5)
981       panfrost_cmdstream_screen_init_v5(screen);
982    else if (dev->arch == 6)
983       panfrost_cmdstream_screen_init_v6(screen);
984    else if (dev->arch == 7)
985       panfrost_cmdstream_screen_init_v7(screen);
986    else if (dev->arch == 9)
987       panfrost_cmdstream_screen_init_v9(screen);
988    else if (dev->arch == 10)
989       panfrost_cmdstream_screen_init_v10(screen);
990    else
991       unreachable("Unhandled architecture major");
992 
993    return &screen->base;
994 }
995