1 /*
2 * Copyright (C) 2008 VMware, Inc.
3 * Copyright (C) 2014 Broadcom
4 * Copyright (C) 2018 Alyssa Rosenzweig
5 * Copyright (C) 2019 Collabora, Ltd.
6 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * SOFTWARE.
26 *
27 */
28
29 #include "draw/draw_context.h"
30 #include "pipe/p_defines.h"
31 #include "pipe/p_screen.h"
32 #include "util/format/u_format.h"
33 #include "util/format/u_format_s3tc.h"
34 #include "util/os_time.h"
35 #include "util/u_debug.h"
36 #include "util/u_memory.h"
37 #include "util/u_process.h"
38 #include "util/u_screen.h"
39 #include "util/u_video.h"
40 #include "util/xmlconfig.h"
41
42 #include <fcntl.h>
43
44 #include "drm-uapi/drm_fourcc.h"
45 #include "drm-uapi/panfrost_drm.h"
46
47 #include "decode.h"
48 #include "pan_bo.h"
49 #include "pan_fence.h"
50 #include "pan_public.h"
51 #include "pan_resource.h"
52 #include "pan_screen.h"
53 #include "pan_shader.h"
54 #include "pan_texture.h"
55 #include "pan_util.h"
56
57 #include "pan_context.h"
58
59 #define DEFAULT_MAX_AFBC_PACKING_RATIO 90
60
61 /* clang-format off */
62 static const struct debug_named_value panfrost_debug_options[] = {
63 {"perf", PAN_DBG_PERF, "Enable performance warnings"},
64 {"trace", PAN_DBG_TRACE, "Trace the command stream"},
65 {"dirty", PAN_DBG_DIRTY, "Always re-emit all state"},
66 {"sync", PAN_DBG_SYNC, "Wait for each job's completion and abort on GPU faults"},
67 {"nofp16", PAN_DBG_NOFP16, "Disable 16-bit support"},
68 {"gl3", PAN_DBG_GL3, "Enable experimental GL 3.x implementation, up to 3.3"},
69 {"noafbc", PAN_DBG_NO_AFBC, "Disable AFBC support"},
70 {"nocrc", PAN_DBG_NO_CRC, "Disable transaction elimination"},
71 {"msaa16", PAN_DBG_MSAA16, "Enable MSAA 8x and 16x support"},
72 {"linear", PAN_DBG_LINEAR, "Force linear textures"},
73 {"nocache", PAN_DBG_NO_CACHE, "Disable BO cache"},
74 {"dump", PAN_DBG_DUMP, "Dump all graphics memory"},
75 #ifdef PAN_DBG_OVERFLOW
76 {"overflow", PAN_DBG_OVERFLOW, "Check for buffer overflows in pool uploads"},
77 #endif
78 {"yuv", PAN_DBG_YUV, "Tint YUV textures with blue for 1-plane and green for 2-plane"},
79 {"forcepack", PAN_DBG_FORCE_PACK, "Force packing of AFBC textures on upload"},
80 {"cs", PAN_DBG_CS, "Enable extra checks in command stream"},
81 DEBUG_NAMED_VALUE_END
82 };
83 /* clang-format on */
84
85 static const char *
panfrost_get_name(struct pipe_screen * screen)86 panfrost_get_name(struct pipe_screen *screen)
87 {
88 return pan_device(screen)->model->name;
89 }
90
91 static const char *
panfrost_get_vendor(struct pipe_screen * screen)92 panfrost_get_vendor(struct pipe_screen *screen)
93 {
94 return "Mesa";
95 }
96
97 static const char *
panfrost_get_device_vendor(struct pipe_screen * screen)98 panfrost_get_device_vendor(struct pipe_screen *screen)
99 {
100 return "Arm";
101 }
102
103 static int
from_kmod_group_allow_priority_flags(enum pan_kmod_group_allow_priority_flags kmod_flags)104 from_kmod_group_allow_priority_flags(
105 enum pan_kmod_group_allow_priority_flags kmod_flags)
106 {
107 int flags = 0;
108
109 if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME)
110 flags |= PIPE_CONTEXT_PRIORITY_REALTIME;
111
112 if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH)
113 flags |= PIPE_CONTEXT_PRIORITY_HIGH;
114
115 if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM)
116 flags |= PIPE_CONTEXT_PRIORITY_MEDIUM;
117
118 if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW)
119 flags |= PIPE_CONTEXT_PRIORITY_LOW;
120
121 return flags;
122 }
123
124 static int
panfrost_get_shader_param(struct pipe_screen * screen,enum pipe_shader_type shader,enum pipe_shader_cap param)125 panfrost_get_shader_param(struct pipe_screen *screen,
126 enum pipe_shader_type shader,
127 enum pipe_shader_cap param)
128 {
129 struct panfrost_device *dev = pan_device(screen);
130 bool is_nofp16 = dev->debug & PAN_DBG_NOFP16;
131
132 switch (shader) {
133 case PIPE_SHADER_VERTEX:
134 case PIPE_SHADER_FRAGMENT:
135 case PIPE_SHADER_COMPUTE:
136 break;
137 default:
138 return 0;
139 }
140
141 /* We only allow observable side effects (memory writes) in compute and
142 * fragment shaders. Side effects in the geometry pipeline cause
143 * trouble with IDVS and conflict with our transform feedback lowering.
144 */
145 bool allow_side_effects = (shader != PIPE_SHADER_VERTEX);
146
147 switch (param) {
148 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
149 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
150 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
151 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
152 return 16384; /* arbitrary */
153
154 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
155 return 1024; /* arbitrary */
156
157 case PIPE_SHADER_CAP_MAX_INPUTS:
158 /* Used as ABI on Midgard */
159 return 16;
160
161 case PIPE_SHADER_CAP_MAX_OUTPUTS:
162 return shader == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
163
164 case PIPE_SHADER_CAP_MAX_TEMPS:
165 return 256; /* arbitrary */
166
167 case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
168 return 16 * 1024 * sizeof(float);
169
170 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
171 STATIC_ASSERT(PAN_MAX_CONST_BUFFERS < 0x100);
172 return PAN_MAX_CONST_BUFFERS;
173
174 case PIPE_SHADER_CAP_CONT_SUPPORTED:
175 return 0;
176
177 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
178 return dev->arch >= 6;
179
180 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
181 return 1;
182
183 case PIPE_SHADER_CAP_SUBROUTINES:
184 return 0;
185
186 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
187 return 0;
188
189 case PIPE_SHADER_CAP_INTEGERS:
190 return 1;
191
192 /* The Bifrost compiler supports full 16-bit. Midgard could but int16
193 * support is untested, so restrict INT16 to Bifrost. Midgard
194 * architecturally cannot support fp16 derivatives. */
195
196 case PIPE_SHADER_CAP_FP16:
197 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
198 return !is_nofp16;
199 case PIPE_SHADER_CAP_FP16_DERIVATIVES:
200 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
201 return dev->arch >= 6 && !is_nofp16;
202 case PIPE_SHADER_CAP_INT16:
203 /* Blocked on https://gitlab.freedesktop.org/mesa/mesa/-/issues/6075 */
204 return false;
205
206 case PIPE_SHADER_CAP_INT64_ATOMICS:
207 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
208 return 0;
209
210 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
211 STATIC_ASSERT(PIPE_MAX_SAMPLERS < 0x10000);
212 return PIPE_MAX_SAMPLERS;
213
214 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
215 STATIC_ASSERT(PIPE_MAX_SHADER_SAMPLER_VIEWS < 0x10000);
216 return PIPE_MAX_SHADER_SAMPLER_VIEWS;
217
218 case PIPE_SHADER_CAP_SUPPORTED_IRS:
219 return (1 << PIPE_SHADER_IR_NIR);
220
221 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
222 return allow_side_effects ? 16 : 0;
223
224 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
225 return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;
226
227 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
228 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
229 return 0;
230
231 default:
232 return 0;
233 }
234
235 return 0;
236 }
237
238 static uint32_t
pipe_to_pan_bind_flags(uint32_t pipe_bind_flags)239 pipe_to_pan_bind_flags(uint32_t pipe_bind_flags)
240 {
241 static_assert(PIPE_BIND_DEPTH_STENCIL == PAN_BIND_DEPTH_STENCIL, "");
242 static_assert(PIPE_BIND_RENDER_TARGET == PAN_BIND_RENDER_TARGET, "");
243 static_assert(PIPE_BIND_SAMPLER_VIEW == PAN_BIND_SAMPLER_VIEW, "");
244 static_assert(PIPE_BIND_VERTEX_BUFFER == PAN_BIND_VERTEX_BUFFER, "");
245
246 return pipe_bind_flags & (PAN_BIND_DEPTH_STENCIL | PAN_BIND_RENDER_TARGET |
247 PAN_BIND_VERTEX_BUFFER | PAN_BIND_SAMPLER_VIEW);
248 }
249
250 /**
251 * Query format support for creating a texture, drawing surface, etc.
252 * \param format the format to test
253 * \param type one of PIPE_TEXTURE, PIPE_SURFACE
254 */
255 static bool
panfrost_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned bind)256 panfrost_is_format_supported(struct pipe_screen *screen,
257 enum pipe_format format,
258 enum pipe_texture_target target,
259 unsigned sample_count,
260 unsigned storage_sample_count, unsigned bind)
261 {
262 struct panfrost_device *dev = pan_device(screen);
263
264 /* MSAA 2x gets rounded up to 4x. MSAA 8x/16x only supported on v5+.
265 * TODO: debug MSAA 8x/16x */
266
267 switch (sample_count) {
268 case 0:
269 case 1:
270 case 4:
271 break;
272 case 8:
273 case 16:
274 if (dev->debug & PAN_DBG_MSAA16)
275 break;
276 else
277 return false;
278 default:
279 return false;
280 }
281
282 if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
283 return false;
284
285 /* Z16 causes dEQP failures on t720 */
286 if (format == PIPE_FORMAT_Z16_UNORM && dev->arch <= 4)
287 return false;
288
289 /* Check we support the format with the given bind */
290
291 unsigned pan_bind_flags = pipe_to_pan_bind_flags(bind);
292 struct panfrost_format fmt = dev->formats[format];
293 unsigned fmt_bind_flags = fmt.bind;
294
295 /* Also check that compressed texture formats are supported on this
296 * particular chip. They may not be depending on system integration
297 * differences. */
298
299 bool supported =
300 !util_format_is_compressed(format) ||
301 panfrost_supports_compressed_format(dev, fmt.texfeat_bit);
302
303 if (!supported)
304 return false;
305
306 if (bind & PIPE_BIND_DEPTH_STENCIL) {
307 /* On panfrost, S8_UINT is actually stored as X8S8_UINT, which
308 * causes us headaches when we try to bind it as DEPTH_STENCIL;
309 * the gallium driver doesn't handle this correctly. So reject
310 * it for now.
311 */
312 switch (format) {
313 case PIPE_FORMAT_S8_UINT:
314 fmt_bind_flags &= ~PAN_BIND_DEPTH_STENCIL;
315 break;
316 default:
317 /* no other special handling required yet */
318 break;
319 }
320 }
321
322 return MALI_EXTRACT_INDEX(fmt.hw) &&
323 ((pan_bind_flags & ~fmt_bind_flags) == 0);
324 }
325
326 static void
panfrost_query_compression_rates(struct pipe_screen * screen,enum pipe_format format,int max,uint32_t * rates,int * count)327 panfrost_query_compression_rates(struct pipe_screen *screen,
328 enum pipe_format format, int max,
329 uint32_t *rates, int *count)
330 {
331 struct panfrost_device *dev = pan_device(screen);
332
333 if (!dev->has_afrc) {
334 *count = 0;
335 return;
336 }
337
338 *count = panfrost_afrc_query_rates(format, max, rates);
339 }
340
341 /* We always support linear and tiled operations, both external and internal.
342 * We support AFBC for a subset of formats, and colourspace transform for a
343 * subset of those. */
344
345 static void
panfrost_walk_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count,uint64_t test_modifier,bool allow_afrc)346 panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen,
347 enum pipe_format format, int max,
348 uint64_t *modifiers, unsigned int *external_only,
349 int *out_count, uint64_t test_modifier, bool allow_afrc)
350 {
351 /* Query AFBC status */
352 struct panfrost_device *dev = pan_device(screen);
353 bool afbc =
354 dev->has_afbc && panfrost_format_supports_afbc(dev->arch, format);
355 bool ytr = panfrost_afbc_can_ytr(format);
356 bool tiled_afbc = panfrost_afbc_can_tile(dev->arch);
357 bool afrc = allow_afrc && dev->has_afrc && panfrost_format_supports_afrc(format);
358
359 unsigned count = 0;
360
361 for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
362 if (drm_is_afbc(pan_best_modifiers[i])) {
363 if (!afbc)
364 continue;
365
366 if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_SPLIT) &&
367 !panfrost_afbc_can_split(dev->arch, format, pan_best_modifiers[i]))
368 continue;
369
370 if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_YTR) && !ytr)
371 continue;
372
373 if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_TILED) && !tiled_afbc)
374 continue;
375 }
376
377 if (drm_is_afrc(pan_best_modifiers[i]) && !afrc)
378 continue;
379
380 if (drm_is_mtk_tiled(format, pan_best_modifiers[i]) &&
381 !panfrost_format_supports_mtk_tiled(format))
382 continue;
383
384 if (test_modifier != DRM_FORMAT_MOD_INVALID &&
385 test_modifier != pan_best_modifiers[i])
386 continue;
387
388 if (max > (int)count) {
389 modifiers[count] = pan_best_modifiers[i];
390
391 if (external_only)
392 external_only[count] = drm_is_mtk_tiled(format, modifiers[count]);
393 }
394 count++;
395 }
396
397 *out_count = count;
398 }
399
400 static void
panfrost_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count)401 panfrost_query_dmabuf_modifiers(struct pipe_screen *screen,
402 enum pipe_format format, int max,
403 uint64_t *modifiers,
404 unsigned int *external_only, int *out_count)
405 {
406 panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers, external_only,
407 out_count, DRM_FORMAT_MOD_INVALID, true);
408 }
409
410 static void
panfrost_query_compression_modifiers(struct pipe_screen * screen,enum pipe_format format,uint32_t rate,int max,uint64_t * modifiers,int * count)411 panfrost_query_compression_modifiers(struct pipe_screen *screen,
412 enum pipe_format format, uint32_t rate,
413 int max, uint64_t *modifiers, int *count)
414 {
415 struct panfrost_device *dev = pan_device(screen);
416
417 if (rate == PIPE_COMPRESSION_FIXED_RATE_NONE)
418 /* no compression requested, return all non-afrc formats */
419 panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers,
420 NULL, /* external_only */
421 count,
422 DRM_FORMAT_MOD_INVALID,
423 false /* disallow afrc */);
424 else if (dev->has_afrc)
425 *count = panfrost_afrc_get_modifiers(format, rate, max, modifiers);
426 else
427 *count = 0; /* compression requested but not supported */
428 }
429
430 static bool
panfrost_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)431 panfrost_is_dmabuf_modifier_supported(struct pipe_screen *screen,
432 uint64_t modifier,
433 enum pipe_format format,
434 bool *external_only)
435 {
436 uint64_t unused;
437 unsigned int uint_extern_only = 0;
438 int count;
439
440 panfrost_walk_dmabuf_modifiers(screen, format, 1, &unused, &uint_extern_only,
441 &count, modifier, true);
442
443 if (external_only)
444 *external_only = uint_extern_only ? true : false;
445
446 return count > 0;
447 }
448
449 static int
panfrost_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)450 panfrost_get_compute_param(struct pipe_screen *pscreen,
451 enum pipe_shader_ir ir_type,
452 enum pipe_compute_cap param, void *ret)
453 {
454 struct panfrost_device *dev = pan_device(pscreen);
455 const char *const ir = "panfrost";
456
457 #define RET(x) \
458 do { \
459 if (ret) \
460 memcpy(ret, x, sizeof(x)); \
461 return sizeof(x); \
462 } while (0)
463
464 switch (param) {
465 case PIPE_COMPUTE_CAP_ADDRESS_BITS:
466 RET((uint32_t[]){64});
467
468 case PIPE_COMPUTE_CAP_IR_TARGET:
469 if (ret)
470 sprintf(ret, "%s", ir);
471 return strlen(ir) * sizeof(char);
472
473 case PIPE_COMPUTE_CAP_GRID_DIMENSION:
474 RET((uint64_t[]){3});
475
476 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
477 RET(((uint64_t[]){65535, 65535, 65535}));
478
479 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
480 /* Unpredictable behaviour at larger sizes. Mali-G52 advertises
481 * 384x384x384.
482 *
483 * On Midgard, we don't allow more than 128 threads in each
484 * direction to match PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK.
485 * That still exceeds the minimum-maximum.
486 */
487 if (dev->arch >= 6)
488 RET(((uint64_t[]){256, 256, 256}));
489 else
490 RET(((uint64_t[]){128, 128, 128}));
491
492 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
493 /* On Bifrost and newer, all GPUs can support at least 256 threads
494 * regardless of register usage, so we report 256.
495 *
496 * On Midgard, with maximum register usage, the maximum
497 * thread count is only 64. We would like to report 64 here, but
498 * the GLES3.1 spec minimum is 128, so we report 128 and limit
499 * the register allocation of affected compute kernels.
500 */
501 RET((uint64_t[]){dev->arch >= 6 ? 256 : 128});
502
503 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
504 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
505 uint64_t total_ram;
506
507 if (!os_get_total_physical_memory(&total_ram))
508 return 0;
509
510 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
511 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
512 */
513 uint64_t available_ram;
514 if (total_ram <= 4ull * 1024 * 1024 * 1024)
515 available_ram = total_ram / 2;
516 else
517 available_ram = total_ram * 3 / 4;
518
519 /* 48bit address space max, with the lower 32MB reserved. We clamp
520 * things so it matches kmod VA range limitations.
521 */
522 uint64_t user_va_start =
523 panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_START);
524 uint64_t user_va_end =
525 panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_END);
526
527 /* We cannot support more than the VA limit */
528 RET((uint64_t[]){MIN2(available_ram, user_va_end - user_va_start)});
529 }
530
531 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
532 RET((uint64_t[]){32768});
533
534 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
535 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
536 RET((uint64_t[]){4096});
537
538 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
539 RET((uint32_t[]){800 /* MHz -- TODO */});
540
541 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
542 RET((uint32_t[]){dev->core_count});
543
544 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
545 RET((uint32_t[]){1});
546
547 case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
548 RET((uint32_t[]){pan_subgroup_size(dev->arch)});
549
550 case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
551 RET((uint32_t[]){0 /* TODO */});
552
553 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
554 RET((uint64_t[]){1024}); // TODO
555 }
556
557 return 0;
558 }
559
560 static void
panfrost_init_screen_caps(struct panfrost_screen * screen)561 panfrost_init_screen_caps(struct panfrost_screen *screen)
562 {
563 struct pipe_caps *caps = (struct pipe_caps *)&screen->base.caps;
564
565 u_init_pipe_screen_caps(&screen->base, 1);
566
567 struct panfrost_device *dev = &screen->dev;
568
569 /* Our GL 3.x implementation is WIP */
570 bool is_gl3 = dev->debug & PAN_DBG_GL3;
571
572 /* Native MRT is introduced with v5 */
573 bool has_mrt = (dev->arch >= 5);
574
575 caps->npot_textures = true;
576 caps->mixed_color_depth_bits = true;
577 caps->fragment_shader_texture_lod = true;
578 caps->vertex_color_unclamped = true;
579 caps->depth_clip_disable = true;
580 caps->mixed_framebuffer_sizes = true;
581 caps->frontend_noop = true;
582 caps->sample_shading = true;
583 caps->fragment_shader_derivatives = true;
584 caps->framebuffer_no_attachment = true;
585 caps->quads_follow_provoking_vertex_convention = true;
586 caps->shader_pack_half_float = true;
587 caps->has_const_bw = true;
588
589 /* Removed in v9 (Valhall) */
590 caps->depth_clip_disable_separate = dev->arch < 9;
591
592 caps->max_render_targets =
593 caps->fbfetch = has_mrt ? 8 : 1;
594 caps->fbfetch_coherent = true;
595
596 caps->max_dual_source_render_targets = 1;
597
598 caps->occlusion_query = true;
599 caps->primitive_restart = true;
600 caps->primitive_restart_fixed_index = true;
601
602 caps->anisotropic_filter =
603 panfrost_device_gpu_rev(dev) >= dev->model->min_rev_anisotropic;
604
605 /* Compile side is done for Bifrost, Midgard TODO. Needs some kernel
606 * work to turn on, since CYCLE_COUNT_START needs to be issued. In
607 * kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
608 * yet way to request this with mainline TODO */
609 caps->shader_clock = false;
610
611 caps->vs_instanceid = true;
612 caps->texture_multisample = true;
613 caps->surface_sample_count = true;
614
615 caps->sampler_view_target = true;
616 caps->clip_halfz = true;
617 caps->polygon_offset_clamp = true;
618 caps->texture_swizzle = true;
619 caps->texture_mirror_clamp_to_edge = true;
620 caps->vertex_element_instance_divisor = true;
621 caps->blend_equation_separate = true;
622 caps->indep_blend_enable = true;
623 caps->indep_blend_func = true;
624 caps->generate_mipmap = true;
625 caps->uma = true;
626 caps->texture_float_linear = true;
627 caps->texture_half_float_linear = true;
628 caps->shader_array_components = true;
629 caps->texture_buffer_objects = true;
630 caps->packed_uniforms = true;
631 caps->image_load_formatted = true;
632 caps->cube_map_array = true;
633 caps->compute = true;
634 caps->int64 = true;
635
636 caps->copy_between_compressed_and_plain_formats = true;
637
638 caps->max_stream_output_buffers = PIPE_MAX_SO_BUFFERS;
639
640 caps->max_stream_output_separate_components =
641 caps->max_stream_output_interleaved_components = PIPE_MAX_SO_OUTPUTS;
642
643 caps->stream_output_pause_resume = true;
644 caps->stream_output_interleave_buffers = true;
645
646 caps->max_texture_array_layers = 2048;
647
648 caps->glsl_feature_level =
649 caps->glsl_feature_level_compatibility = is_gl3 ? 330 : 140;
650 caps->essl_feature_level = dev->arch >= 6 ? 320 : 310;
651
652 caps->constant_buffer_offset_alignment = 16;
653
654 /* v7 (only) restricts component orders with AFBC. To workaround, we
655 * compose format swizzles with texture swizzles. pan_texture.c motsly
656 * handles this but we need to fix up the border colour.
657 */
658 caps->texture_border_color_quirk = dev->arch == 7 || dev->arch >= 10 ?
659 PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO : 0;
660
661 caps->max_texel_buffer_elements = PAN_MAX_TEXEL_BUFFER_ELEMENTS;
662
663 /* Must be at least 64 for correct behaviour */
664 caps->texture_buffer_offset_alignment = 64;
665
666 caps->query_time_elapsed =
667 caps->query_timestamp =
668 dev->kmod.props.gpu_can_query_timestamp &&
669 dev->kmod.props.timestamp_frequency != 0;
670
671 if (caps->query_timestamp)
672 caps->timer_resolution = pan_gpu_time_to_ns(dev, 1);
673
674 /* The hardware requires element alignment for data conversion to work
675 * as expected. If data conversion is not required, this restriction is
676 * lifted on Midgard at a performance penalty. We conservatively
677 * require element alignment for vertex buffers, using u_vbuf to
678 * translate to match the hardware requirement.
679 *
680 * This is less heavy-handed than PIPE_VERTEX_INPUT_ALIGNMENT_4BYTE, which
681 * would needlessly require alignment even for 8-bit formats.
682 */
683 caps->vertex_input_alignment = PIPE_VERTEX_INPUT_ALIGNMENT_ELEMENT;
684
685 caps->max_texture_2d_size = 1 << (PAN_MAX_MIP_LEVELS - 1);
686
687 caps->max_texture_3d_levels =
688 caps->max_texture_cube_levels = PAN_MAX_MIP_LEVELS;
689
690 /* pixel coord is in integer sysval on bifrost. */
691 caps->fs_coord_pixel_center_integer = dev->arch >= 6;
692 caps->fs_coord_pixel_center_half_integer = dev->arch < 6;
693
694 /* Hardware is upper left */
695 caps->fs_coord_origin_lower_left = false;
696
697 caps->fs_coord_origin_upper_left = true;
698 caps->tgsi_texcoord = true;
699
700 /* We would prefer varyings on Midgard, but proper sysvals on Bifrost */
701 caps->fs_face_is_integer_sysval =
702 caps->fs_position_is_sysval =
703 caps->fs_point_is_sysval = dev->arch >= 6;
704
705 caps->seamless_cube_map = true;
706 caps->seamless_cube_map_per_texture = true;
707
708 caps->max_vertex_element_src_offset = 0xffff;
709
710 caps->texture_transfer_modes = 0;
711
712 caps->endianness = PIPE_ENDIAN_NATIVE;
713
714 caps->max_texture_gather_components = 4;
715
716 caps->min_texture_gather_offset = -8;
717
718 caps->max_texture_gather_offset = 7;
719
720 uint64_t system_memory;
721 caps->video_memory = os_get_total_physical_memory(&system_memory) ?
722 system_memory >> 20 : 0;
723
724 caps->shader_stencil_export = true;
725 caps->conditional_render = true;
726 caps->conditional_render_inverted = true;
727
728 caps->shader_buffer_offset_alignment = 4;
729
730 caps->max_varyings = dev->arch >= 9 ? 16 : 32;
731
732 /* Removed in v6 (Bifrost) */
733 caps->gl_clamp =
734 caps->texture_mirror_clamp =
735 caps->alpha_test = dev->arch <= 5;
736
737 /* Removed in v9 (Valhall). PRIMTIIVE_RESTART_FIXED_INDEX is of course
738 * still supported as it is core GLES3.0 functionality
739 */
740 caps->emulate_nonfixed_primitive_restart = dev->arch >= 9;
741
742 caps->flatshade = false;
743 caps->two_sided_color = false;
744 caps->clip_planes = 0;
745
746 caps->packed_stream_output = false;
747
748 caps->viewport_transform_lowered = true;
749 caps->psiz_clamped = true;
750
751 caps->nir_images_as_deref = false;
752
753 caps->draw_indirect = true;
754
755 caps->multi_draw_indirect = dev->arch >= 10;
756
757 caps->start_instance =
758 caps->draw_parameters = pan_is_bifrost(dev);
759
760 /* Mali supports GLES and QUADS. Midgard and v6 Bifrost
761 * support more */
762 uint32_t modes = BITFIELD_MASK(MESA_PRIM_QUADS + 1);
763
764 if (dev->arch <= 6) {
765 modes |= BITFIELD_BIT(MESA_PRIM_QUAD_STRIP);
766 modes |= BITFIELD_BIT(MESA_PRIM_POLYGON);
767 }
768
769 if (dev->arch >= 9) {
770 /* Although Valhall is supposed to support quads, they
771 * don't seem to work correctly. Disable to fix
772 * arb-provoking-vertex-render.
773 */
774 modes &= ~BITFIELD_BIT(MESA_PRIM_QUADS);
775 }
776
777 caps->supported_prim_modes =
778 caps->supported_prim_modes_with_restart = modes;
779
780 caps->image_store_formatted = true;
781
782 caps->native_fence_fd = true;
783
784 caps->context_priority_mask =
785 from_kmod_group_allow_priority_flags(
786 dev->kmod.props.allowed_group_priorities_mask);
787
788 caps->astc_decode_mode = dev->arch >= 9 && (dev->compressed_formats & (1 << 30));
789
790 caps->min_line_width =
791 caps->min_line_width_aa =
792 caps->min_point_size =
793 caps->min_point_size_aa = 1;
794
795 caps->point_size_granularity =
796 caps->line_width_granularity = 0.0625;
797
798 caps->max_line_width =
799 caps->max_line_width_aa =
800 caps->max_point_size =
801 caps->max_point_size_aa = 4095.9375;
802
803 caps->max_texture_anisotropy = 16.0;
804
805 caps->max_texture_lod_bias = 16.0; /* arbitrary */
806 }
807
808 static void
panfrost_destroy_screen(struct pipe_screen * pscreen)809 panfrost_destroy_screen(struct pipe_screen *pscreen)
810 {
811 struct panfrost_device *dev = pan_device(pscreen);
812 struct panfrost_screen *screen = pan_screen(pscreen);
813
814 panfrost_resource_screen_destroy(pscreen);
815 panfrost_pool_cleanup(&screen->mempools.bin);
816 panfrost_pool_cleanup(&screen->mempools.desc);
817 pan_blend_shader_cache_cleanup(&dev->blend_shaders);
818
819 if (screen->vtbl.screen_destroy)
820 screen->vtbl.screen_destroy(pscreen);
821
822 if (dev->ro)
823 dev->ro->destroy(dev->ro);
824 panfrost_close_device(dev);
825
826 disk_cache_destroy(screen->disk_cache);
827 ralloc_free(pscreen);
828 }
829
830 static const void *
panfrost_screen_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)831 panfrost_screen_get_compiler_options(struct pipe_screen *pscreen,
832 enum pipe_shader_ir ir,
833 enum pipe_shader_type shader)
834 {
835 return pan_screen(pscreen)->vtbl.get_compiler_options();
836 }
837
838 static struct disk_cache *
panfrost_get_disk_shader_cache(struct pipe_screen * pscreen)839 panfrost_get_disk_shader_cache(struct pipe_screen *pscreen)
840 {
841 return pan_screen(pscreen)->disk_cache;
842 }
843
844 static int
panfrost_get_screen_fd(struct pipe_screen * pscreen)845 panfrost_get_screen_fd(struct pipe_screen *pscreen)
846 {
847 return panfrost_device_fd(pan_device(pscreen));
848 }
849
850 int
panfrost_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)851 panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
852 struct pipe_driver_query_info *info)
853 {
854 int num_queries = ARRAY_SIZE(panfrost_driver_query_list);
855
856 if (!info)
857 return num_queries;
858
859 if (index >= num_queries)
860 return 0;
861
862 *info = panfrost_driver_query_list[index];
863
864 return 1;
865 }
866
867 static uint64_t
panfrost_get_timestamp(struct pipe_screen * pscreen)868 panfrost_get_timestamp(struct pipe_screen *pscreen)
869 {
870 struct panfrost_device *dev = pan_device(pscreen);
871
872 return pan_gpu_time_to_ns(dev, pan_kmod_query_timestamp(dev->kmod.dev));
873 }
874
875 struct pipe_screen *
panfrost_create_screen(int fd,const struct pipe_screen_config * config,struct renderonly * ro)876 panfrost_create_screen(int fd, const struct pipe_screen_config *config,
877 struct renderonly *ro)
878 {
879 /* Create the screen */
880 struct panfrost_screen *screen = rzalloc(NULL, struct panfrost_screen);
881
882 if (!screen)
883 return NULL;
884
885 struct panfrost_device *dev = pan_device(&screen->base);
886
887 driParseConfigFiles(config->options, config->options_info, 0,
888 "panfrost", NULL, NULL, NULL, 0, NULL, 0);
889
890 /* Debug must be set first for pandecode to work correctly */
891 dev->debug =
892 debug_get_flags_option("PAN_MESA_DEBUG", panfrost_debug_options, 0);
893 screen->max_afbc_packing_ratio = debug_get_num_option(
894 "PAN_MAX_AFBC_PACKING_RATIO", DEFAULT_MAX_AFBC_PACKING_RATIO);
895
896 if (panfrost_open_device(screen, fd, dev)) {
897 ralloc_free(screen);
898 return NULL;
899 }
900
901 if (dev->debug & PAN_DBG_NO_AFBC)
902 dev->has_afbc = false;
903
904 /* Bail early on unsupported hardware */
905 if (dev->model == NULL) {
906 debug_printf("panfrost: Unsupported model %X",
907 panfrost_device_gpu_id(dev));
908 panfrost_destroy_screen(&(screen->base));
909 return NULL;
910 }
911
912 screen->force_afbc_packing = dev->debug & PAN_DBG_FORCE_PACK;
913 if (!screen->force_afbc_packing)
914 screen->force_afbc_packing = driQueryOptionb(config->options,
915 "pan_force_afbc_packing");
916
917 const char *option = debug_get_option("PAN_AFRC_RATE", NULL);
918 if (!option) {
919 screen->force_afrc_rate = -1;
920 } else if (strcmp(option, "default") == 0) {
921 screen->force_afrc_rate = PIPE_COMPRESSION_FIXED_RATE_DEFAULT;
922 } else {
923 int64_t rate =
924 debug_parse_num_option(option, PIPE_COMPRESSION_FIXED_RATE_NONE);
925 screen->force_afrc_rate = rate;
926 }
927
928 screen->csf_tiler_heap.chunk_size = driQueryOptioni(config->options,
929 "pan_csf_chunk_size");
930 screen->csf_tiler_heap.initial_chunks = driQueryOptioni(config->options,
931 "pan_csf_initial_chunks");
932 screen->csf_tiler_heap.max_chunks = driQueryOptioni(config->options,
933 "pan_csf_max_chunks");
934
935 dev->ro = ro;
936
937 screen->base.destroy = panfrost_destroy_screen;
938
939 screen->base.get_screen_fd = panfrost_get_screen_fd;
940 screen->base.get_name = panfrost_get_name;
941 screen->base.get_vendor = panfrost_get_vendor;
942 screen->base.get_device_vendor = panfrost_get_device_vendor;
943 screen->base.get_driver_query_info = panfrost_get_driver_query_info;
944 screen->base.get_shader_param = panfrost_get_shader_param;
945 screen->base.get_compute_param = panfrost_get_compute_param;
946 screen->base.get_timestamp = panfrost_get_timestamp;
947 screen->base.is_format_supported = panfrost_is_format_supported;
948 screen->base.query_dmabuf_modifiers = panfrost_query_dmabuf_modifiers;
949 screen->base.is_dmabuf_modifier_supported =
950 panfrost_is_dmabuf_modifier_supported;
951 screen->base.context_create = panfrost_create_context;
952 screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
953 screen->base.get_disk_shader_cache = panfrost_get_disk_shader_cache;
954 screen->base.fence_reference = panfrost_fence_reference;
955 screen->base.fence_finish = panfrost_fence_finish;
956 screen->base.fence_get_fd = panfrost_fence_get_fd;
957 screen->base.set_damage_region = panfrost_resource_set_damage_region;
958 screen->base.query_compression_rates = panfrost_query_compression_rates;
959 screen->base.query_compression_modifiers =
960 panfrost_query_compression_modifiers;
961
962 panfrost_resource_screen_init(&screen->base);
963 pan_blend_shader_cache_init(&dev->blend_shaders,
964 panfrost_device_gpu_id(dev));
965
966 panfrost_init_screen_caps(screen);
967
968 panfrost_disk_cache_init(screen);
969
970 if (panfrost_pool_init(&screen->mempools.bin, NULL, dev, PAN_BO_EXECUTE,
971 4096, "Preload shaders", false, true) ||
972 panfrost_pool_init(&screen->mempools.desc, NULL, dev, 0, 65536,
973 "Preload RSDs", false, true)) {
974 panfrost_destroy_screen(&(screen->base));
975 return NULL;
976 }
977
978 if (dev->arch == 4)
979 panfrost_cmdstream_screen_init_v4(screen);
980 else if (dev->arch == 5)
981 panfrost_cmdstream_screen_init_v5(screen);
982 else if (dev->arch == 6)
983 panfrost_cmdstream_screen_init_v6(screen);
984 else if (dev->arch == 7)
985 panfrost_cmdstream_screen_init_v7(screen);
986 else if (dev->arch == 9)
987 panfrost_cmdstream_screen_init_v9(screen);
988 else if (dev->arch == 10)
989 panfrost_cmdstream_screen_init_v10(screen);
990 else
991 unreachable("Unhandled architecture major");
992
993 return &screen->base;
994 }
995