• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 VMware, Inc.
3  * Copyright (C) 2014 Broadcom
4  * Copyright (C) 2018 Alyssa Rosenzweig
5  * Copyright (C) 2019 Collabora, Ltd.
6  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25  * SOFTWARE.
26  *
27  */
28 
29 #include "draw/draw_context.h"
30 #include "pipe/p_defines.h"
31 #include "pipe/p_screen.h"
32 #include "util/format/u_format.h"
33 #include "util/format/u_format_s3tc.h"
34 #include "util/os_time.h"
35 #include "util/u_debug.h"
36 #include "util/u_memory.h"
37 #include "util/u_process.h"
38 #include "util/u_screen.h"
39 #include "util/u_video.h"
40 #include "util/xmlconfig.h"
41 
42 #include <fcntl.h>
43 
44 #include "drm-uapi/drm_fourcc.h"
45 #include "drm-uapi/panfrost_drm.h"
46 
47 #include "decode.h"
48 #include "pan_bo.h"
49 #include "pan_fence.h"
50 #include "pan_public.h"
51 #include "pan_resource.h"
52 #include "pan_screen.h"
53 #include "pan_shader.h"
54 #include "pan_util.h"
55 
56 #include "pan_context.h"
57 
58 #define DEFAULT_MAX_AFBC_PACKING_RATIO 90
59 
60 /* clang-format off */
61 static const struct debug_named_value panfrost_debug_options[] = {
62    {"perf",       PAN_DBG_PERF,     "Enable performance warnings"},
63    {"trace",      PAN_DBG_TRACE,    "Trace the command stream"},
64    {"dirty",      PAN_DBG_DIRTY,    "Always re-emit all state"},
65    {"sync",       PAN_DBG_SYNC,     "Wait for each job's completion and abort on GPU faults"},
66    {"nofp16",     PAN_DBG_NOFP16,    "Disable 16-bit support"},
67    {"gl3",        PAN_DBG_GL3,      "Enable experimental GL 3.x implementation, up to 3.3"},
68    {"noafbc",     PAN_DBG_NO_AFBC,  "Disable AFBC support"},
69    {"nocrc",      PAN_DBG_NO_CRC,   "Disable transaction elimination"},
70    {"msaa16",     PAN_DBG_MSAA16,   "Enable MSAA 8x and 16x support"},
71    {"linear",     PAN_DBG_LINEAR,   "Force linear textures"},
72    {"nocache",    PAN_DBG_NO_CACHE, "Disable BO cache"},
73    {"dump",       PAN_DBG_DUMP,     "Dump all graphics memory"},
74 #ifdef PAN_DBG_OVERFLOW
75    {"overflow",   PAN_DBG_OVERFLOW, "Check for buffer overflows in pool uploads"},
76 #endif
77    {"yuv",        PAN_DBG_YUV,      "Tint YUV textures with blue for 1-plane and green for 2-plane"},
78    {"forcepack",  PAN_DBG_FORCE_PACK,  "Force packing of AFBC textures on upload"},
79    {"cs",         PAN_DBG_CS,       "Enable extra checks in command stream"},
80    DEBUG_NAMED_VALUE_END
81 };
82 /* clang-format on */
83 
84 static const char *
panfrost_get_name(struct pipe_screen * screen)85 panfrost_get_name(struct pipe_screen *screen)
86 {
87    return pan_device(screen)->model->name;
88 }
89 
90 static const char *
panfrost_get_vendor(struct pipe_screen * screen)91 panfrost_get_vendor(struct pipe_screen *screen)
92 {
93    return "Mesa";
94 }
95 
96 static const char *
panfrost_get_device_vendor(struct pipe_screen * screen)97 panfrost_get_device_vendor(struct pipe_screen *screen)
98 {
99    return "Arm";
100 }
101 
102 static int
from_kmod_group_allow_priority_flags(enum pan_kmod_group_allow_priority_flags kmod_flags)103 from_kmod_group_allow_priority_flags(
104    enum pan_kmod_group_allow_priority_flags kmod_flags)
105 {
106    int flags = 0;
107 
108    if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME)
109       flags |= PIPE_CONTEXT_PRIORITY_REALTIME;
110 
111    if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH)
112       flags |= PIPE_CONTEXT_PRIORITY_HIGH;
113 
114    if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM)
115       flags |= PIPE_CONTEXT_PRIORITY_MEDIUM;
116 
117    if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW)
118       flags |= PIPE_CONTEXT_PRIORITY_LOW;
119 
120    return flags;
121 }
122 
123 static int
panfrost_get_shader_param(struct pipe_screen * screen,enum pipe_shader_type shader,enum pipe_shader_cap param)124 panfrost_get_shader_param(struct pipe_screen *screen,
125                           enum pipe_shader_type shader,
126                           enum pipe_shader_cap param)
127 {
128    struct panfrost_device *dev = pan_device(screen);
129    bool is_nofp16 = dev->debug & PAN_DBG_NOFP16;
130 
131    switch (shader) {
132    case PIPE_SHADER_VERTEX:
133    case PIPE_SHADER_FRAGMENT:
134    case PIPE_SHADER_COMPUTE:
135       break;
136    default:
137       return 0;
138    }
139 
140    /* We only allow observable side effects (memory writes) in compute and
141     * fragment shaders. Side effects in the geometry pipeline cause
142     * trouble with IDVS and conflict with our transform feedback lowering.
143     */
144    bool allow_side_effects = (shader != PIPE_SHADER_VERTEX);
145 
146    switch (param) {
147    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
148    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
149    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
150    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
151       return 16384; /* arbitrary */
152 
153    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
154       return 1024; /* arbitrary */
155 
156    case PIPE_SHADER_CAP_MAX_INPUTS:
157       /* Used as ABI on Midgard */
158       return 16;
159 
160    case PIPE_SHADER_CAP_MAX_OUTPUTS:
161       return shader == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
162 
163    case PIPE_SHADER_CAP_MAX_TEMPS:
164       return 256; /* arbitrary */
165 
166    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
167       return 16 * 1024 * sizeof(float);
168 
169    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
170       STATIC_ASSERT(PAN_MAX_CONST_BUFFERS < 0x100);
171       return PAN_MAX_CONST_BUFFERS;
172 
173    case PIPE_SHADER_CAP_CONT_SUPPORTED:
174       return 0;
175 
176    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
177       return dev->arch >= 6;
178 
179    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
180       return 1;
181 
182    case PIPE_SHADER_CAP_SUBROUTINES:
183       return 0;
184 
185    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
186       return 0;
187 
188    case PIPE_SHADER_CAP_INTEGERS:
189       return 1;
190 
191       /* The Bifrost compiler supports full 16-bit. Midgard could but int16
192        * support is untested, so restrict INT16 to Bifrost. Midgard
193        * architecturally cannot support fp16 derivatives. */
194 
195    case PIPE_SHADER_CAP_FP16:
196    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
197       return !is_nofp16;
198    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
199    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
200       return dev->arch >= 6 && !is_nofp16;
201    case PIPE_SHADER_CAP_INT16:
202       /* Blocked on https://gitlab.freedesktop.org/mesa/mesa/-/issues/6075 */
203       return false;
204 
205    case PIPE_SHADER_CAP_INT64_ATOMICS:
206    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
207       return 0;
208 
209    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
210       STATIC_ASSERT(PIPE_MAX_SAMPLERS < 0x10000);
211       return PIPE_MAX_SAMPLERS;
212 
213    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
214       STATIC_ASSERT(PIPE_MAX_SHADER_SAMPLER_VIEWS < 0x10000);
215       return PIPE_MAX_SHADER_SAMPLER_VIEWS;
216 
217    case PIPE_SHADER_CAP_SUPPORTED_IRS:
218       return (1 << PIPE_SHADER_IR_NIR);
219 
220    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
221       return allow_side_effects ? 16 : 0;
222 
223    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
224       return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;
225 
226    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
227    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
228       return 0;
229 
230    default:
231       return 0;
232    }
233 
234    return 0;
235 }
236 
237 static uint32_t
pipe_to_pan_bind_flags(uint32_t pipe_bind_flags)238 pipe_to_pan_bind_flags(uint32_t pipe_bind_flags)
239 {
240    static_assert(PIPE_BIND_DEPTH_STENCIL == PAN_BIND_DEPTH_STENCIL, "");
241    static_assert(PIPE_BIND_RENDER_TARGET == PAN_BIND_RENDER_TARGET, "");
242    static_assert(PIPE_BIND_SAMPLER_VIEW == PAN_BIND_SAMPLER_VIEW, "");
243    static_assert(PIPE_BIND_VERTEX_BUFFER == PAN_BIND_VERTEX_BUFFER, "");
244 
245    return pipe_bind_flags & (PAN_BIND_DEPTH_STENCIL | PAN_BIND_RENDER_TARGET |
246                              PAN_BIND_VERTEX_BUFFER | PAN_BIND_SAMPLER_VIEW);
247 }
248 
249 /**
250  * Query format support for creating a texture, drawing surface, etc.
251  * \param format  the format to test
252  * \param type  one of PIPE_TEXTURE, PIPE_SURFACE
253  */
254 static bool
panfrost_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned bind)255 panfrost_is_format_supported(struct pipe_screen *screen,
256                              enum pipe_format format,
257                              enum pipe_texture_target target,
258                              unsigned sample_count,
259                              unsigned storage_sample_count, unsigned bind)
260 {
261    struct panfrost_device *dev = pan_device(screen);
262 
263    /* MSAA 2x gets rounded up to 4x. MSAA 8x/16x only supported on v5+.
264     * TODO: debug MSAA 8x/16x */
265 
266    switch (sample_count) {
267    case 0:
268    case 1:
269    case 4:
270       break;
271    case 8:
272    case 16:
273       if (dev->debug & PAN_DBG_MSAA16)
274          break;
275       else
276          return false;
277    default:
278       return false;
279    }
280 
281    if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
282       return false;
283 
284    /* Z16 causes dEQP failures on t720 */
285    if (format == PIPE_FORMAT_Z16_UNORM && dev->arch <= 4)
286       return false;
287 
288    /* Check we support the format with the given bind */
289 
290    unsigned pan_bind_flags = pipe_to_pan_bind_flags(bind);
291    struct panfrost_format fmt = dev->formats[format];
292    unsigned fmt_bind_flags = fmt.bind;
293 
294    /* Also check that compressed texture formats are supported on this
295     * particular chip. They may not be depending on system integration
296     * differences. */
297 
298    bool supported =
299       !util_format_is_compressed(format) ||
300       panfrost_supports_compressed_format(dev, fmt.texfeat_bit);
301 
302    if (!supported)
303       return false;
304 
305    if (bind & PIPE_BIND_DEPTH_STENCIL) {
306       /* On panfrost, S8_UINT is actually stored as X8S8_UINT, which
307        * causes us headaches when we try to bind it as DEPTH_STENCIL;
308        * the gallium driver doesn't handle this correctly. So reject
309        * it for now.
310        */
311       switch (format) {
312       case PIPE_FORMAT_S8_UINT:
313          fmt_bind_flags &= ~PAN_BIND_DEPTH_STENCIL;
314          break;
315       default:
316          /* no other special handling required yet */
317          break;
318       }
319    }
320 
321    return MALI_EXTRACT_INDEX(fmt.hw) &&
322       ((pan_bind_flags & ~fmt_bind_flags) == 0);
323 }
324 
325 static void
panfrost_query_compression_rates(struct pipe_screen * screen,enum pipe_format format,int max,uint32_t * rates,int * count)326 panfrost_query_compression_rates(struct pipe_screen *screen,
327                                  enum pipe_format format, int max,
328                                  uint32_t *rates, int *count)
329 {
330    struct panfrost_device *dev = pan_device(screen);
331 
332    if (!dev->has_afrc) {
333       *count = 0;
334       return;
335    }
336 
337    *count = panfrost_afrc_query_rates(format, max, rates);
338 }
339 
340 /* We always support linear and tiled operations, both external and internal.
341  * We support AFBC for a subset of formats, and colourspace transform for a
342  * subset of those. */
343 
344 static void
panfrost_walk_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count,uint64_t test_modifier,bool allow_afrc)345 panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen,
346                                enum pipe_format format, int max,
347                                uint64_t *modifiers, unsigned int *external_only,
348                                int *out_count, uint64_t test_modifier, bool allow_afrc)
349 {
350    /* Query AFBC status */
351    struct panfrost_device *dev = pan_device(screen);
352    bool afbc =
353       dev->has_afbc && panfrost_format_supports_afbc(dev->arch, format);
354    bool ytr = panfrost_afbc_can_ytr(format);
355    bool tiled_afbc = panfrost_afbc_can_tile(dev->arch);
356    bool afrc = allow_afrc && dev->has_afrc && panfrost_format_supports_afrc(format);
357 
358    unsigned count = 0;
359 
360    for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
361       if (drm_is_afbc(pan_best_modifiers[i])) {
362          if (!afbc)
363             continue;
364 
365          if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_SPLIT) &&
366              !panfrost_afbc_can_split(dev->arch, format, pan_best_modifiers[i]))
367             continue;
368 
369          if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_YTR) && !ytr)
370             continue;
371 
372          if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_TILED) && !tiled_afbc)
373             continue;
374       }
375 
376       if (drm_is_afrc(pan_best_modifiers[i]) && !afrc)
377          continue;
378 
379       if (test_modifier != DRM_FORMAT_MOD_INVALID &&
380           test_modifier != pan_best_modifiers[i])
381          continue;
382 
383       if (max > (int)count) {
384          modifiers[count] = pan_best_modifiers[i];
385 
386          if (external_only)
387             external_only[count] = false;
388       }
389       count++;
390    }
391 
392    *out_count = count;
393 }
394 
395 static void
panfrost_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count)396 panfrost_query_dmabuf_modifiers(struct pipe_screen *screen,
397                                 enum pipe_format format, int max,
398                                 uint64_t *modifiers,
399                                 unsigned int *external_only, int *out_count)
400 {
401    panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers, external_only,
402                                   out_count, DRM_FORMAT_MOD_INVALID, true);
403 }
404 
405 static void
panfrost_query_compression_modifiers(struct pipe_screen * screen,enum pipe_format format,uint32_t rate,int max,uint64_t * modifiers,int * count)406 panfrost_query_compression_modifiers(struct pipe_screen *screen,
407                                      enum pipe_format format, uint32_t rate,
408                                      int max, uint64_t *modifiers, int *count)
409 {
410    struct panfrost_device *dev = pan_device(screen);
411 
412    if (rate == PIPE_COMPRESSION_FIXED_RATE_NONE)
413       /* no compression requested, return all non-afrc formats */
414       panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers,
415                                      NULL, /* external_only */
416                                      count,
417                                      DRM_FORMAT_MOD_INVALID,
418                                      false /* disallow afrc */);
419    else if (dev->has_afrc)
420       *count = panfrost_afrc_get_modifiers(format, rate, max, modifiers);
421    else
422       *count = 0;  /* compression requested but not supported */
423 }
424 
425 static bool
panfrost_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)426 panfrost_is_dmabuf_modifier_supported(struct pipe_screen *screen,
427                                       uint64_t modifier,
428                                       enum pipe_format format,
429                                       bool *external_only)
430 {
431    uint64_t unused;
432    unsigned int uint_extern_only = 0;
433    int count;
434 
435    panfrost_walk_dmabuf_modifiers(screen, format, 1, &unused, &uint_extern_only,
436                                   &count, modifier, true);
437 
438    if (external_only)
439       *external_only = uint_extern_only ? true : false;
440 
441    return count > 0;
442 }
443 
444 static int
panfrost_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)445 panfrost_get_compute_param(struct pipe_screen *pscreen,
446                            enum pipe_shader_ir ir_type,
447                            enum pipe_compute_cap param, void *ret)
448 {
449    struct panfrost_device *dev = pan_device(pscreen);
450    const char *const ir = "panfrost";
451 
452 #define RET(x)                                                                 \
453    do {                                                                        \
454       if (ret)                                                                 \
455          memcpy(ret, x, sizeof(x));                                            \
456       return sizeof(x);                                                        \
457    } while (0)
458 
459    switch (param) {
460    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
461       RET((uint32_t[]){64});
462 
463    case PIPE_COMPUTE_CAP_IR_TARGET:
464       if (ret)
465          sprintf(ret, "%s", ir);
466       return strlen(ir) * sizeof(char);
467 
468    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
469       RET((uint64_t[]){3});
470 
471    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
472       RET(((uint64_t[]){65535, 65535, 65535}));
473 
474    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
475       /* Unpredictable behaviour at larger sizes. Mali-G52 advertises
476        * 384x384x384.
477        *
478        * On Midgard, we don't allow more than 128 threads in each
479        * direction to match PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK.
480        * That still exceeds the minimum-maximum.
481        */
482       if (dev->arch >= 6)
483          RET(((uint64_t[]){256, 256, 256}));
484       else
485          RET(((uint64_t[]){128, 128, 128}));
486 
487    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
488       /* On Bifrost and newer, all GPUs can support at least 256 threads
489        * regardless of register usage, so we report 256.
490        *
491        * On Midgard, with maximum register usage, the maximum
492        * thread count is only 64. We would like to report 64 here, but
493        * the GLES3.1 spec minimum is 128, so we report 128 and limit
494        * the register allocation of affected compute kernels.
495        */
496       RET((uint64_t[]){dev->arch >= 6 ? 256 : 128});
497 
498    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
499    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
500       uint64_t total_ram;
501 
502       if (!os_get_total_physical_memory(&total_ram))
503          return 0;
504 
505       /* We don't want to burn too much ram with the GPU. If the user has 4GiB
506        * or less, we use at most half. If they have more than 4GiB, we use 3/4.
507        */
508       uint64_t available_ram;
509       if (total_ram <= 4ull * 1024 * 1024 * 1024)
510          available_ram = total_ram / 2;
511       else
512          available_ram = total_ram * 3 / 4;
513 
514       /* 48bit address space max, with the lower 32MB reserved. We clamp
515        * things so it matches kmod VA range limitations.
516        */
517       uint64_t user_va_start =
518          panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_START);
519       uint64_t user_va_end =
520          panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_END);
521 
522       /* We cannot support more than the VA limit */
523       RET((uint64_t[]){MIN2(available_ram, user_va_end - user_va_start)});
524    }
525 
526    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
527       RET((uint64_t[]){32768});
528 
529    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
530    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
531       RET((uint64_t[]){4096});
532 
533    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
534       RET((uint32_t[]){800 /* MHz -- TODO */});
535 
536    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
537       RET((uint32_t[]){dev->core_count});
538 
539    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
540       RET((uint32_t[]){1});
541 
542    case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
543       RET((uint32_t[]){pan_subgroup_size(dev->arch)});
544 
545    case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
546       RET((uint32_t[]){0 /* TODO */});
547 
548    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
549       RET((uint64_t[]){1024}); // TODO
550    }
551 
552    return 0;
553 }
554 
555 static void
panfrost_init_screen_caps(struct panfrost_screen * screen)556 panfrost_init_screen_caps(struct panfrost_screen *screen)
557 {
558    struct pipe_caps *caps = (struct pipe_caps *)&screen->base.caps;
559 
560    u_init_pipe_screen_caps(&screen->base, 1);
561 
562    struct panfrost_device *dev = &screen->dev;
563 
564    /* Our GL 3.x implementation is WIP */
565    bool is_gl3 = dev->debug & PAN_DBG_GL3;
566 
567    /* Native MRT is introduced with v5 */
568    bool has_mrt = (dev->arch >= 5);
569 
570    caps->npot_textures = true;
571    caps->mixed_color_depth_bits = true;
572    caps->fragment_shader_texture_lod = true;
573    caps->vertex_color_unclamped = true;
574    caps->depth_clip_disable = true;
575    caps->mixed_framebuffer_sizes = true;
576    caps->frontend_noop = true;
577    caps->sample_shading = true;
578    caps->fragment_shader_derivatives = true;
579    caps->framebuffer_no_attachment = true;
580    caps->quads_follow_provoking_vertex_convention = true;
581    caps->shader_pack_half_float = true;
582    caps->has_const_bw = true;
583 
584    /* Removed in v9 (Valhall) */
585    caps->depth_clip_disable_separate = dev->arch < 9;
586 
587    caps->max_render_targets =
588    caps->fbfetch = has_mrt ? 8 : 1;
589    caps->fbfetch_coherent = true;
590 
591    caps->max_dual_source_render_targets = 1;
592 
593    caps->occlusion_query = true;
594    caps->primitive_restart = true;
595    caps->primitive_restart_fixed_index = true;
596 
597    caps->anisotropic_filter =
598       panfrost_device_gpu_rev(dev) >= dev->model->min_rev_anisotropic;
599 
600    /* Compile side is done for Bifrost, Midgard TODO. Needs some kernel
601     * work to turn on, since CYCLE_COUNT_START needs to be issued. In
602     * kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
603     * yet way to request this with mainline TODO */
604    caps->shader_clock = false;
605 
606    caps->vs_instanceid = true;
607    caps->texture_multisample = true;
608    caps->surface_sample_count = true;
609 
610    caps->sampler_view_target = true;
611    caps->clip_halfz = true;
612    caps->polygon_offset_clamp = true;
613    caps->texture_swizzle = true;
614    caps->texture_mirror_clamp_to_edge = true;
615    caps->vertex_element_instance_divisor = true;
616    caps->blend_equation_separate = true;
617    caps->indep_blend_enable = true;
618    caps->indep_blend_func = true;
619    caps->generate_mipmap = true;
620    caps->uma = true;
621    caps->texture_float_linear = true;
622    caps->texture_half_float_linear = true;
623    caps->shader_array_components = true;
624    caps->texture_buffer_objects = true;
625    caps->packed_uniforms = true;
626    caps->image_load_formatted = true;
627    caps->cube_map_array = true;
628    caps->compute = true;
629    caps->int64 = true;
630 
631    caps->copy_between_compressed_and_plain_formats = true;
632 
633    caps->max_stream_output_buffers = PIPE_MAX_SO_BUFFERS;
634 
635    caps->max_stream_output_separate_components =
636    caps->max_stream_output_interleaved_components = PIPE_MAX_SO_OUTPUTS;
637 
638    caps->stream_output_pause_resume = true;
639    caps->stream_output_interleave_buffers = true;
640 
641    caps->max_texture_array_layers = 2048;
642 
643    caps->glsl_feature_level =
644    caps->glsl_feature_level_compatibility = is_gl3 ? 330 : 140;
645    caps->essl_feature_level = dev->arch >= 6 ? 320 : 310;
646 
647    caps->constant_buffer_offset_alignment = 16;
648 
649    /* v7 (only) restricts component orders with AFBC. To workaround, we
650     * compose format swizzles with texture swizzles. pan_texture.c motsly
651     * handles this but we need to fix up the border colour.
652     */
653    caps->texture_border_color_quirk = dev->arch == 7 || dev->arch >= 10 ?
654       PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO : 0;
655 
656    caps->max_texel_buffer_elements = PAN_MAX_TEXEL_BUFFER_ELEMENTS;
657 
658    /* Must be at least 64 for correct behaviour */
659    caps->texture_buffer_offset_alignment = 64;
660 
661    caps->query_time_elapsed =
662    caps->query_timestamp =
663       dev->kmod.props.gpu_can_query_timestamp &&
664       dev->kmod.props.timestamp_frequency != 0;
665 
666    caps->timer_resolution = pan_gpu_time_to_ns(dev, 1);
667 
668    /* The hardware requires element alignment for data conversion to work
669     * as expected. If data conversion is not required, this restriction is
670     * lifted on Midgard at a performance penalty. We conservatively
671     * require element alignment for vertex buffers, using u_vbuf to
672     * translate to match the hardware requirement.
673     *
674     * This is less heavy-handed than PIPE_VERTEX_INPUT_ALIGNMENT_4BYTE, which
675     * would needlessly require alignment even for 8-bit formats.
676     */
677    caps->vertex_input_alignment = PIPE_VERTEX_INPUT_ALIGNMENT_ELEMENT;
678 
679    caps->max_texture_2d_size = 1 << (PAN_MAX_MIP_LEVELS - 1);
680 
681    caps->max_texture_3d_levels =
682    caps->max_texture_cube_levels = PAN_MAX_MIP_LEVELS;
683 
684    /* pixel coord is in integer sysval on bifrost. */
685    caps->fs_coord_pixel_center_integer = dev->arch >= 6;
686    caps->fs_coord_pixel_center_half_integer = dev->arch < 6;
687 
688    /* Hardware is upper left */
689    caps->fs_coord_origin_lower_left = false;
690 
691    caps->fs_coord_origin_upper_left = true;
692    caps->tgsi_texcoord = true;
693 
694    /* We would prefer varyings on Midgard, but proper sysvals on Bifrost */
695    caps->fs_face_is_integer_sysval =
696    caps->fs_position_is_sysval =
697    caps->fs_point_is_sysval = dev->arch >= 6;
698 
699    caps->seamless_cube_map = true;
700    caps->seamless_cube_map_per_texture = true;
701 
702    caps->max_vertex_element_src_offset = 0xffff;
703 
704    caps->texture_transfer_modes = 0;
705 
706    caps->endianness = PIPE_ENDIAN_NATIVE;
707 
708    caps->max_texture_gather_components = 4;
709 
710    caps->min_texture_gather_offset = -8;
711 
712    caps->max_texture_gather_offset = 7;
713 
714    uint64_t system_memory;
715    caps->video_memory = os_get_total_physical_memory(&system_memory) ?
716       system_memory >> 20 : 0;
717 
718    caps->shader_stencil_export = true;
719    caps->conditional_render = true;
720    caps->conditional_render_inverted = true;
721 
722    caps->shader_buffer_offset_alignment = 4;
723 
724    caps->max_varyings = dev->arch >= 9 ? 16 : 32;
725 
726    /* Removed in v6 (Bifrost) */
727    caps->gl_clamp =
728    caps->texture_mirror_clamp =
729    caps->alpha_test = dev->arch <= 5;
730 
731    /* Removed in v9 (Valhall). PRIMTIIVE_RESTART_FIXED_INDEX is of course
732     * still supported as it is core GLES3.0 functionality
733     */
734    caps->emulate_nonfixed_primitive_restart = dev->arch >= 9;
735 
736    caps->flatshade = false;
737    caps->two_sided_color = false;
738    caps->clip_planes = 0;
739 
740    caps->packed_stream_output = false;
741 
742    caps->viewport_transform_lowered = true;
743    caps->psiz_clamped = true;
744 
745    caps->nir_images_as_deref = false;
746 
747    caps->draw_indirect = true;
748 
749    caps->multi_draw_indirect = dev->arch >= 10;
750 
751    caps->start_instance =
752    caps->draw_parameters = pan_is_bifrost(dev);
753 
754    /* Mali supports GLES and QUADS. Midgard and v6 Bifrost
755     * support more */
756    uint32_t modes = BITFIELD_MASK(MESA_PRIM_QUADS + 1);
757 
758    if (dev->arch <= 6) {
759       modes |= BITFIELD_BIT(MESA_PRIM_QUAD_STRIP);
760       modes |= BITFIELD_BIT(MESA_PRIM_POLYGON);
761    }
762 
763    if (dev->arch >= 9) {
764       /* Although Valhall is supposed to support quads, they
765        * don't seem to work correctly. Disable to fix
766        * arb-provoking-vertex-render.
767        */
768       modes &= ~BITFIELD_BIT(MESA_PRIM_QUADS);
769    }
770 
771    caps->supported_prim_modes =
772    caps->supported_prim_modes_with_restart = modes;
773 
774    caps->image_store_formatted = true;
775 
776    caps->native_fence_fd = true;
777 
778    caps->context_priority_mask =
779       from_kmod_group_allow_priority_flags(
780          dev->kmod.props.allowed_group_priorities_mask);
781 
782    caps->astc_decode_mode = dev->arch >= 9 && (dev->compressed_formats & (1 << 30));
783 
784    caps->min_line_width =
785    caps->min_line_width_aa =
786    caps->min_point_size =
787    caps->min_point_size_aa = 1;
788 
789    caps->point_size_granularity =
790    caps->line_width_granularity = 0.0625;
791 
792    caps->max_line_width =
793    caps->max_line_width_aa =
794    caps->max_point_size =
795    caps->max_point_size_aa = 4095.9375;
796 
797    caps->max_texture_anisotropy = 16.0;
798 
799    caps->max_texture_lod_bias = 16.0; /* arbitrary */
800 }
801 
802 static void
panfrost_destroy_screen(struct pipe_screen * pscreen)803 panfrost_destroy_screen(struct pipe_screen *pscreen)
804 {
805    struct panfrost_device *dev = pan_device(pscreen);
806    struct panfrost_screen *screen = pan_screen(pscreen);
807 
808    panfrost_resource_screen_destroy(pscreen);
809    panfrost_pool_cleanup(&screen->mempools.bin);
810    panfrost_pool_cleanup(&screen->mempools.desc);
811    pan_blend_shader_cache_cleanup(&dev->blend_shaders);
812 
813    if (screen->vtbl.screen_destroy)
814       screen->vtbl.screen_destroy(pscreen);
815 
816    if (dev->ro)
817       dev->ro->destroy(dev->ro);
818    panfrost_close_device(dev);
819 
820    disk_cache_destroy(screen->disk_cache);
821    ralloc_free(pscreen);
822 }
823 
824 static const void *
panfrost_screen_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)825 panfrost_screen_get_compiler_options(struct pipe_screen *pscreen,
826                                      enum pipe_shader_ir ir,
827                                      enum pipe_shader_type shader)
828 {
829    return pan_screen(pscreen)->vtbl.get_compiler_options();
830 }
831 
832 static struct disk_cache *
panfrost_get_disk_shader_cache(struct pipe_screen * pscreen)833 panfrost_get_disk_shader_cache(struct pipe_screen *pscreen)
834 {
835    return pan_screen(pscreen)->disk_cache;
836 }
837 
838 static int
panfrost_get_screen_fd(struct pipe_screen * pscreen)839 panfrost_get_screen_fd(struct pipe_screen *pscreen)
840 {
841    return panfrost_device_fd(pan_device(pscreen));
842 }
843 
844 int
panfrost_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)845 panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
846                                struct pipe_driver_query_info *info)
847 {
848    int num_queries = ARRAY_SIZE(panfrost_driver_query_list);
849 
850    if (!info)
851       return num_queries;
852 
853    if (index >= num_queries)
854       return 0;
855 
856    *info = panfrost_driver_query_list[index];
857 
858    return 1;
859 }
860 
861 static uint64_t
panfrost_get_timestamp(struct pipe_screen * pscreen)862 panfrost_get_timestamp(struct pipe_screen *pscreen)
863 {
864    struct panfrost_device *dev = pan_device(pscreen);
865 
866    return pan_gpu_time_to_ns(dev, pan_kmod_query_timestamp(dev->kmod.dev));
867 }
868 
869 struct pipe_screen *
panfrost_create_screen(int fd,const struct pipe_screen_config * config,struct renderonly * ro)870 panfrost_create_screen(int fd, const struct pipe_screen_config *config,
871                        struct renderonly *ro)
872 {
873    /* Create the screen */
874    struct panfrost_screen *screen = rzalloc(NULL, struct panfrost_screen);
875 
876    if (!screen)
877       return NULL;
878 
879    struct panfrost_device *dev = pan_device(&screen->base);
880 
881    driParseConfigFiles(config->options, config->options_info, 0,
882                        "panfrost", NULL, NULL, NULL, 0, NULL, 0);
883 
884    /* Debug must be set first for pandecode to work correctly */
885    dev->debug =
886       debug_get_flags_option("PAN_MESA_DEBUG", panfrost_debug_options, 0);
887    screen->max_afbc_packing_ratio = debug_get_num_option(
888       "PAN_MAX_AFBC_PACKING_RATIO", DEFAULT_MAX_AFBC_PACKING_RATIO);
889 
890    if (panfrost_open_device(screen, fd, dev)) {
891       ralloc_free(screen);
892       return NULL;
893    }
894 
895    if (dev->debug & PAN_DBG_NO_AFBC)
896       dev->has_afbc = false;
897 
898    /* Bail early on unsupported hardware */
899    if (dev->model == NULL) {
900       debug_printf("panfrost: Unsupported model %X",
901                    panfrost_device_gpu_id(dev));
902       panfrost_destroy_screen(&(screen->base));
903       return NULL;
904    }
905 
906    screen->force_afbc_packing = dev->debug & PAN_DBG_FORCE_PACK;
907    if (!screen->force_afbc_packing)
908       screen->force_afbc_packing = driQueryOptionb(config->options,
909                                                    "pan_force_afbc_packing");
910 
911    const char *option = debug_get_option("PAN_AFRC_RATE", NULL);
912    if (!option) {
913       screen->force_afrc_rate = -1;
914    } else if (strcmp(option, "default") == 0) {
915       screen->force_afrc_rate = PIPE_COMPRESSION_FIXED_RATE_DEFAULT;
916    } else {
917       int64_t rate =
918          debug_parse_num_option(option, PIPE_COMPRESSION_FIXED_RATE_NONE);
919       screen->force_afrc_rate = rate;
920    }
921 
922    screen->csf_tiler_heap.chunk_size = driQueryOptioni(config->options,
923                                                        "pan_csf_chunk_size");
924    screen->csf_tiler_heap.initial_chunks = driQueryOptioni(config->options,
925                                                            "pan_csf_initial_chunks");
926    screen->csf_tiler_heap.max_chunks = driQueryOptioni(config->options,
927                                                        "pan_csf_max_chunks");
928 
929    dev->ro = ro;
930 
931    screen->base.destroy = panfrost_destroy_screen;
932 
933    screen->base.get_screen_fd = panfrost_get_screen_fd;
934    screen->base.get_name = panfrost_get_name;
935    screen->base.get_vendor = panfrost_get_vendor;
936    screen->base.get_device_vendor = panfrost_get_device_vendor;
937    screen->base.get_driver_query_info = panfrost_get_driver_query_info;
938    screen->base.get_shader_param = panfrost_get_shader_param;
939    screen->base.get_compute_param = panfrost_get_compute_param;
940    screen->base.get_timestamp = panfrost_get_timestamp;
941    screen->base.is_format_supported = panfrost_is_format_supported;
942    screen->base.query_dmabuf_modifiers = panfrost_query_dmabuf_modifiers;
943    screen->base.is_dmabuf_modifier_supported =
944       panfrost_is_dmabuf_modifier_supported;
945    screen->base.context_create = panfrost_create_context;
946    screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
947    screen->base.get_disk_shader_cache = panfrost_get_disk_shader_cache;
948    screen->base.fence_reference = panfrost_fence_reference;
949    screen->base.fence_finish = panfrost_fence_finish;
950    screen->base.fence_get_fd = panfrost_fence_get_fd;
951    screen->base.set_damage_region = panfrost_resource_set_damage_region;
952    screen->base.query_compression_rates = panfrost_query_compression_rates;
953    screen->base.query_compression_modifiers =
954       panfrost_query_compression_modifiers;
955 
956    panfrost_resource_screen_init(&screen->base);
957    pan_blend_shader_cache_init(&dev->blend_shaders,
958                                panfrost_device_gpu_id(dev));
959 
960    panfrost_init_screen_caps(screen);
961 
962    panfrost_disk_cache_init(screen);
963 
964    if (panfrost_pool_init(&screen->mempools.bin, NULL, dev, PAN_BO_EXECUTE,
965                           4096, "Preload shaders", false, true) ||
966        panfrost_pool_init(&screen->mempools.desc, NULL, dev, 0, 65536,
967                           "Preload RSDs", false, true)) {
968       panfrost_destroy_screen(&(screen->base));
969       return NULL;
970    }
971 
972    if (dev->arch == 4)
973       panfrost_cmdstream_screen_init_v4(screen);
974    else if (dev->arch == 5)
975       panfrost_cmdstream_screen_init_v5(screen);
976    else if (dev->arch == 6)
977       panfrost_cmdstream_screen_init_v6(screen);
978    else if (dev->arch == 7)
979       panfrost_cmdstream_screen_init_v7(screen);
980    else if (dev->arch == 9)
981       panfrost_cmdstream_screen_init_v9(screen);
982    else if (dev->arch == 10)
983       panfrost_cmdstream_screen_init_v10(screen);
984    else
985       unreachable("Unhandled architecture major");
986 
987    return &screen->base;
988 }
989