1 /*
2 * Copyright (C) 2008 VMware, Inc.
3 * Copyright (C) 2014 Broadcom
4 * Copyright (C) 2018 Alyssa Rosenzweig
5 * Copyright (C) 2019 Collabora, Ltd.
6 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * SOFTWARE.
26 *
27 */
28
29 #include "draw/draw_context.h"
30 #include "pipe/p_defines.h"
31 #include "pipe/p_screen.h"
32 #include "util/format/u_format.h"
33 #include "util/format/u_format_s3tc.h"
34 #include "util/os_time.h"
35 #include "util/u_debug.h"
36 #include "util/u_memory.h"
37 #include "util/u_process.h"
38 #include "util/u_screen.h"
39 #include "util/u_video.h"
40 #include "util/xmlconfig.h"
41
42 #include <fcntl.h>
43
44 #include "drm-uapi/drm_fourcc.h"
45 #include "drm-uapi/panfrost_drm.h"
46
47 #include "decode.h"
48 #include "pan_bo.h"
49 #include "pan_fence.h"
50 #include "pan_public.h"
51 #include "pan_resource.h"
52 #include "pan_screen.h"
53 #include "pan_shader.h"
54 #include "pan_util.h"
55
56 #include "pan_context.h"
57
58 #define DEFAULT_MAX_AFBC_PACKING_RATIO 90
59
60 /* clang-format off */
61 static const struct debug_named_value panfrost_debug_options[] = {
62 {"perf", PAN_DBG_PERF, "Enable performance warnings"},
63 {"trace", PAN_DBG_TRACE, "Trace the command stream"},
64 {"dirty", PAN_DBG_DIRTY, "Always re-emit all state"},
65 {"sync", PAN_DBG_SYNC, "Wait for each job's completion and abort on GPU faults"},
66 {"nofp16", PAN_DBG_NOFP16, "Disable 16-bit support"},
67 {"gl3", PAN_DBG_GL3, "Enable experimental GL 3.x implementation, up to 3.3"},
68 {"noafbc", PAN_DBG_NO_AFBC, "Disable AFBC support"},
69 {"nocrc", PAN_DBG_NO_CRC, "Disable transaction elimination"},
70 {"msaa16", PAN_DBG_MSAA16, "Enable MSAA 8x and 16x support"},
71 {"linear", PAN_DBG_LINEAR, "Force linear textures"},
72 {"nocache", PAN_DBG_NO_CACHE, "Disable BO cache"},
73 {"dump", PAN_DBG_DUMP, "Dump all graphics memory"},
74 #ifdef PAN_DBG_OVERFLOW
75 {"overflow", PAN_DBG_OVERFLOW, "Check for buffer overflows in pool uploads"},
76 #endif
77 {"yuv", PAN_DBG_YUV, "Tint YUV textures with blue for 1-plane and green for 2-plane"},
78 {"forcepack", PAN_DBG_FORCE_PACK, "Force packing of AFBC textures on upload"},
79 {"cs", PAN_DBG_CS, "Enable extra checks in command stream"},
80 DEBUG_NAMED_VALUE_END
81 };
82 /* clang-format on */
83
84 static const char *
panfrost_get_name(struct pipe_screen * screen)85 panfrost_get_name(struct pipe_screen *screen)
86 {
87 return pan_device(screen)->model->name;
88 }
89
90 static const char *
panfrost_get_vendor(struct pipe_screen * screen)91 panfrost_get_vendor(struct pipe_screen *screen)
92 {
93 return "Mesa";
94 }
95
96 static const char *
panfrost_get_device_vendor(struct pipe_screen * screen)97 panfrost_get_device_vendor(struct pipe_screen *screen)
98 {
99 return "Arm";
100 }
101
102 static int
from_kmod_group_allow_priority_flags(enum pan_kmod_group_allow_priority_flags kmod_flags)103 from_kmod_group_allow_priority_flags(
104 enum pan_kmod_group_allow_priority_flags kmod_flags)
105 {
106 int flags = 0;
107
108 if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME)
109 flags |= PIPE_CONTEXT_PRIORITY_REALTIME;
110
111 if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH)
112 flags |= PIPE_CONTEXT_PRIORITY_HIGH;
113
114 if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM)
115 flags |= PIPE_CONTEXT_PRIORITY_MEDIUM;
116
117 if (kmod_flags & PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW)
118 flags |= PIPE_CONTEXT_PRIORITY_LOW;
119
120 return flags;
121 }
122
123 static int
panfrost_get_shader_param(struct pipe_screen * screen,enum pipe_shader_type shader,enum pipe_shader_cap param)124 panfrost_get_shader_param(struct pipe_screen *screen,
125 enum pipe_shader_type shader,
126 enum pipe_shader_cap param)
127 {
128 struct panfrost_device *dev = pan_device(screen);
129 bool is_nofp16 = dev->debug & PAN_DBG_NOFP16;
130
131 switch (shader) {
132 case PIPE_SHADER_VERTEX:
133 case PIPE_SHADER_FRAGMENT:
134 case PIPE_SHADER_COMPUTE:
135 break;
136 default:
137 return 0;
138 }
139
140 /* We only allow observable side effects (memory writes) in compute and
141 * fragment shaders. Side effects in the geometry pipeline cause
142 * trouble with IDVS and conflict with our transform feedback lowering.
143 */
144 bool allow_side_effects = (shader != PIPE_SHADER_VERTEX);
145
146 switch (param) {
147 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
148 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
149 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
150 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
151 return 16384; /* arbitrary */
152
153 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
154 return 1024; /* arbitrary */
155
156 case PIPE_SHADER_CAP_MAX_INPUTS:
157 /* Used as ABI on Midgard */
158 return 16;
159
160 case PIPE_SHADER_CAP_MAX_OUTPUTS:
161 return shader == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
162
163 case PIPE_SHADER_CAP_MAX_TEMPS:
164 return 256; /* arbitrary */
165
166 case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
167 return 16 * 1024 * sizeof(float);
168
169 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
170 STATIC_ASSERT(PAN_MAX_CONST_BUFFERS < 0x100);
171 return PAN_MAX_CONST_BUFFERS;
172
173 case PIPE_SHADER_CAP_CONT_SUPPORTED:
174 return 0;
175
176 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
177 return dev->arch >= 6;
178
179 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
180 return 1;
181
182 case PIPE_SHADER_CAP_SUBROUTINES:
183 return 0;
184
185 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
186 return 0;
187
188 case PIPE_SHADER_CAP_INTEGERS:
189 return 1;
190
191 /* The Bifrost compiler supports full 16-bit. Midgard could but int16
192 * support is untested, so restrict INT16 to Bifrost. Midgard
193 * architecturally cannot support fp16 derivatives. */
194
195 case PIPE_SHADER_CAP_FP16:
196 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
197 return !is_nofp16;
198 case PIPE_SHADER_CAP_FP16_DERIVATIVES:
199 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
200 return dev->arch >= 6 && !is_nofp16;
201 case PIPE_SHADER_CAP_INT16:
202 /* Blocked on https://gitlab.freedesktop.org/mesa/mesa/-/issues/6075 */
203 return false;
204
205 case PIPE_SHADER_CAP_INT64_ATOMICS:
206 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
207 return 0;
208
209 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
210 STATIC_ASSERT(PIPE_MAX_SAMPLERS < 0x10000);
211 return PIPE_MAX_SAMPLERS;
212
213 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
214 STATIC_ASSERT(PIPE_MAX_SHADER_SAMPLER_VIEWS < 0x10000);
215 return PIPE_MAX_SHADER_SAMPLER_VIEWS;
216
217 case PIPE_SHADER_CAP_SUPPORTED_IRS:
218 return (1 << PIPE_SHADER_IR_NIR);
219
220 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
221 return allow_side_effects ? 16 : 0;
222
223 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
224 return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;
225
226 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
227 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
228 return 0;
229
230 default:
231 return 0;
232 }
233
234 return 0;
235 }
236
237 static uint32_t
pipe_to_pan_bind_flags(uint32_t pipe_bind_flags)238 pipe_to_pan_bind_flags(uint32_t pipe_bind_flags)
239 {
240 static_assert(PIPE_BIND_DEPTH_STENCIL == PAN_BIND_DEPTH_STENCIL, "");
241 static_assert(PIPE_BIND_RENDER_TARGET == PAN_BIND_RENDER_TARGET, "");
242 static_assert(PIPE_BIND_SAMPLER_VIEW == PAN_BIND_SAMPLER_VIEW, "");
243 static_assert(PIPE_BIND_VERTEX_BUFFER == PAN_BIND_VERTEX_BUFFER, "");
244
245 return pipe_bind_flags & (PAN_BIND_DEPTH_STENCIL | PAN_BIND_RENDER_TARGET |
246 PAN_BIND_VERTEX_BUFFER | PAN_BIND_SAMPLER_VIEW);
247 }
248
249 /**
250 * Query format support for creating a texture, drawing surface, etc.
251 * \param format the format to test
252 * \param type one of PIPE_TEXTURE, PIPE_SURFACE
253 */
254 static bool
panfrost_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned bind)255 panfrost_is_format_supported(struct pipe_screen *screen,
256 enum pipe_format format,
257 enum pipe_texture_target target,
258 unsigned sample_count,
259 unsigned storage_sample_count, unsigned bind)
260 {
261 struct panfrost_device *dev = pan_device(screen);
262
263 /* MSAA 2x gets rounded up to 4x. MSAA 8x/16x only supported on v5+.
264 * TODO: debug MSAA 8x/16x */
265
266 switch (sample_count) {
267 case 0:
268 case 1:
269 case 4:
270 break;
271 case 8:
272 case 16:
273 if (dev->debug & PAN_DBG_MSAA16)
274 break;
275 else
276 return false;
277 default:
278 return false;
279 }
280
281 if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
282 return false;
283
284 /* Z16 causes dEQP failures on t720 */
285 if (format == PIPE_FORMAT_Z16_UNORM && dev->arch <= 4)
286 return false;
287
288 /* Check we support the format with the given bind */
289
290 unsigned pan_bind_flags = pipe_to_pan_bind_flags(bind);
291 struct panfrost_format fmt = dev->formats[format];
292 unsigned fmt_bind_flags = fmt.bind;
293
294 /* Also check that compressed texture formats are supported on this
295 * particular chip. They may not be depending on system integration
296 * differences. */
297
298 bool supported =
299 !util_format_is_compressed(format) ||
300 panfrost_supports_compressed_format(dev, fmt.texfeat_bit);
301
302 if (!supported)
303 return false;
304
305 if (bind & PIPE_BIND_DEPTH_STENCIL) {
306 /* On panfrost, S8_UINT is actually stored as X8S8_UINT, which
307 * causes us headaches when we try to bind it as DEPTH_STENCIL;
308 * the gallium driver doesn't handle this correctly. So reject
309 * it for now.
310 */
311 switch (format) {
312 case PIPE_FORMAT_S8_UINT:
313 fmt_bind_flags &= ~PAN_BIND_DEPTH_STENCIL;
314 break;
315 default:
316 /* no other special handling required yet */
317 break;
318 }
319 }
320
321 return MALI_EXTRACT_INDEX(fmt.hw) &&
322 ((pan_bind_flags & ~fmt_bind_flags) == 0);
323 }
324
325 static void
panfrost_query_compression_rates(struct pipe_screen * screen,enum pipe_format format,int max,uint32_t * rates,int * count)326 panfrost_query_compression_rates(struct pipe_screen *screen,
327 enum pipe_format format, int max,
328 uint32_t *rates, int *count)
329 {
330 struct panfrost_device *dev = pan_device(screen);
331
332 if (!dev->has_afrc) {
333 *count = 0;
334 return;
335 }
336
337 *count = panfrost_afrc_query_rates(format, max, rates);
338 }
339
340 /* We always support linear and tiled operations, both external and internal.
341 * We support AFBC for a subset of formats, and colourspace transform for a
342 * subset of those. */
343
344 static void
panfrost_walk_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count,uint64_t test_modifier,bool allow_afrc)345 panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen,
346 enum pipe_format format, int max,
347 uint64_t *modifiers, unsigned int *external_only,
348 int *out_count, uint64_t test_modifier, bool allow_afrc)
349 {
350 /* Query AFBC status */
351 struct panfrost_device *dev = pan_device(screen);
352 bool afbc =
353 dev->has_afbc && panfrost_format_supports_afbc(dev->arch, format);
354 bool ytr = panfrost_afbc_can_ytr(format);
355 bool tiled_afbc = panfrost_afbc_can_tile(dev->arch);
356 bool afrc = allow_afrc && dev->has_afrc && panfrost_format_supports_afrc(format);
357
358 unsigned count = 0;
359
360 for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
361 if (drm_is_afbc(pan_best_modifiers[i])) {
362 if (!afbc)
363 continue;
364
365 if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_SPLIT) &&
366 !panfrost_afbc_can_split(dev->arch, format, pan_best_modifiers[i]))
367 continue;
368
369 if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_YTR) && !ytr)
370 continue;
371
372 if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_TILED) && !tiled_afbc)
373 continue;
374 }
375
376 if (drm_is_afrc(pan_best_modifiers[i]) && !afrc)
377 continue;
378
379 if (test_modifier != DRM_FORMAT_MOD_INVALID &&
380 test_modifier != pan_best_modifiers[i])
381 continue;
382
383 if (max > (int)count) {
384 modifiers[count] = pan_best_modifiers[i];
385
386 if (external_only)
387 external_only[count] = false;
388 }
389 count++;
390 }
391
392 *out_count = count;
393 }
394
395 static void
panfrost_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count)396 panfrost_query_dmabuf_modifiers(struct pipe_screen *screen,
397 enum pipe_format format, int max,
398 uint64_t *modifiers,
399 unsigned int *external_only, int *out_count)
400 {
401 panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers, external_only,
402 out_count, DRM_FORMAT_MOD_INVALID, true);
403 }
404
405 static void
panfrost_query_compression_modifiers(struct pipe_screen * screen,enum pipe_format format,uint32_t rate,int max,uint64_t * modifiers,int * count)406 panfrost_query_compression_modifiers(struct pipe_screen *screen,
407 enum pipe_format format, uint32_t rate,
408 int max, uint64_t *modifiers, int *count)
409 {
410 struct panfrost_device *dev = pan_device(screen);
411
412 if (rate == PIPE_COMPRESSION_FIXED_RATE_NONE)
413 /* no compression requested, return all non-afrc formats */
414 panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers,
415 NULL, /* external_only */
416 count,
417 DRM_FORMAT_MOD_INVALID,
418 false /* disallow afrc */);
419 else if (dev->has_afrc)
420 *count = panfrost_afrc_get_modifiers(format, rate, max, modifiers);
421 else
422 *count = 0; /* compression requested but not supported */
423 }
424
425 static bool
panfrost_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)426 panfrost_is_dmabuf_modifier_supported(struct pipe_screen *screen,
427 uint64_t modifier,
428 enum pipe_format format,
429 bool *external_only)
430 {
431 uint64_t unused;
432 unsigned int uint_extern_only = 0;
433 int count;
434
435 panfrost_walk_dmabuf_modifiers(screen, format, 1, &unused, &uint_extern_only,
436 &count, modifier, true);
437
438 if (external_only)
439 *external_only = uint_extern_only ? true : false;
440
441 return count > 0;
442 }
443
444 static int
panfrost_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)445 panfrost_get_compute_param(struct pipe_screen *pscreen,
446 enum pipe_shader_ir ir_type,
447 enum pipe_compute_cap param, void *ret)
448 {
449 struct panfrost_device *dev = pan_device(pscreen);
450 const char *const ir = "panfrost";
451
452 #define RET(x) \
453 do { \
454 if (ret) \
455 memcpy(ret, x, sizeof(x)); \
456 return sizeof(x); \
457 } while (0)
458
459 switch (param) {
460 case PIPE_COMPUTE_CAP_ADDRESS_BITS:
461 RET((uint32_t[]){64});
462
463 case PIPE_COMPUTE_CAP_IR_TARGET:
464 if (ret)
465 sprintf(ret, "%s", ir);
466 return strlen(ir) * sizeof(char);
467
468 case PIPE_COMPUTE_CAP_GRID_DIMENSION:
469 RET((uint64_t[]){3});
470
471 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
472 RET(((uint64_t[]){65535, 65535, 65535}));
473
474 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
475 /* Unpredictable behaviour at larger sizes. Mali-G52 advertises
476 * 384x384x384.
477 *
478 * On Midgard, we don't allow more than 128 threads in each
479 * direction to match PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK.
480 * That still exceeds the minimum-maximum.
481 */
482 if (dev->arch >= 6)
483 RET(((uint64_t[]){256, 256, 256}));
484 else
485 RET(((uint64_t[]){128, 128, 128}));
486
487 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
488 /* On Bifrost and newer, all GPUs can support at least 256 threads
489 * regardless of register usage, so we report 256.
490 *
491 * On Midgard, with maximum register usage, the maximum
492 * thread count is only 64. We would like to report 64 here, but
493 * the GLES3.1 spec minimum is 128, so we report 128 and limit
494 * the register allocation of affected compute kernels.
495 */
496 RET((uint64_t[]){dev->arch >= 6 ? 256 : 128});
497
498 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
499 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
500 uint64_t total_ram;
501
502 if (!os_get_total_physical_memory(&total_ram))
503 return 0;
504
505 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
506 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
507 */
508 uint64_t available_ram;
509 if (total_ram <= 4ull * 1024 * 1024 * 1024)
510 available_ram = total_ram / 2;
511 else
512 available_ram = total_ram * 3 / 4;
513
514 /* 48bit address space max, with the lower 32MB reserved. We clamp
515 * things so it matches kmod VA range limitations.
516 */
517 uint64_t user_va_start =
518 panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_START);
519 uint64_t user_va_end =
520 panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_END);
521
522 /* We cannot support more than the VA limit */
523 RET((uint64_t[]){MIN2(available_ram, user_va_end - user_va_start)});
524 }
525
526 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
527 RET((uint64_t[]){32768});
528
529 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
530 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
531 RET((uint64_t[]){4096});
532
533 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
534 RET((uint32_t[]){800 /* MHz -- TODO */});
535
536 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
537 RET((uint32_t[]){dev->core_count});
538
539 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
540 RET((uint32_t[]){1});
541
542 case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
543 RET((uint32_t[]){pan_subgroup_size(dev->arch)});
544
545 case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
546 RET((uint32_t[]){0 /* TODO */});
547
548 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
549 RET((uint64_t[]){1024}); // TODO
550 }
551
552 return 0;
553 }
554
555 static void
panfrost_init_screen_caps(struct panfrost_screen * screen)556 panfrost_init_screen_caps(struct panfrost_screen *screen)
557 {
558 struct pipe_caps *caps = (struct pipe_caps *)&screen->base.caps;
559
560 u_init_pipe_screen_caps(&screen->base, 1);
561
562 struct panfrost_device *dev = &screen->dev;
563
564 /* Our GL 3.x implementation is WIP */
565 bool is_gl3 = dev->debug & PAN_DBG_GL3;
566
567 /* Native MRT is introduced with v5 */
568 bool has_mrt = (dev->arch >= 5);
569
570 caps->npot_textures = true;
571 caps->mixed_color_depth_bits = true;
572 caps->fragment_shader_texture_lod = true;
573 caps->vertex_color_unclamped = true;
574 caps->depth_clip_disable = true;
575 caps->mixed_framebuffer_sizes = true;
576 caps->frontend_noop = true;
577 caps->sample_shading = true;
578 caps->fragment_shader_derivatives = true;
579 caps->framebuffer_no_attachment = true;
580 caps->quads_follow_provoking_vertex_convention = true;
581 caps->shader_pack_half_float = true;
582 caps->has_const_bw = true;
583
584 /* Removed in v9 (Valhall) */
585 caps->depth_clip_disable_separate = dev->arch < 9;
586
587 caps->max_render_targets =
588 caps->fbfetch = has_mrt ? 8 : 1;
589 caps->fbfetch_coherent = true;
590
591 caps->max_dual_source_render_targets = 1;
592
593 caps->occlusion_query = true;
594 caps->primitive_restart = true;
595 caps->primitive_restart_fixed_index = true;
596
597 caps->anisotropic_filter =
598 panfrost_device_gpu_rev(dev) >= dev->model->min_rev_anisotropic;
599
600 /* Compile side is done for Bifrost, Midgard TODO. Needs some kernel
601 * work to turn on, since CYCLE_COUNT_START needs to be issued. In
602 * kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
603 * yet way to request this with mainline TODO */
604 caps->shader_clock = false;
605
606 caps->vs_instanceid = true;
607 caps->texture_multisample = true;
608 caps->surface_sample_count = true;
609
610 caps->sampler_view_target = true;
611 caps->clip_halfz = true;
612 caps->polygon_offset_clamp = true;
613 caps->texture_swizzle = true;
614 caps->texture_mirror_clamp_to_edge = true;
615 caps->vertex_element_instance_divisor = true;
616 caps->blend_equation_separate = true;
617 caps->indep_blend_enable = true;
618 caps->indep_blend_func = true;
619 caps->generate_mipmap = true;
620 caps->uma = true;
621 caps->texture_float_linear = true;
622 caps->texture_half_float_linear = true;
623 caps->shader_array_components = true;
624 caps->texture_buffer_objects = true;
625 caps->packed_uniforms = true;
626 caps->image_load_formatted = true;
627 caps->cube_map_array = true;
628 caps->compute = true;
629 caps->int64 = true;
630
631 caps->copy_between_compressed_and_plain_formats = true;
632
633 caps->max_stream_output_buffers = PIPE_MAX_SO_BUFFERS;
634
635 caps->max_stream_output_separate_components =
636 caps->max_stream_output_interleaved_components = PIPE_MAX_SO_OUTPUTS;
637
638 caps->stream_output_pause_resume = true;
639 caps->stream_output_interleave_buffers = true;
640
641 caps->max_texture_array_layers = 2048;
642
643 caps->glsl_feature_level =
644 caps->glsl_feature_level_compatibility = is_gl3 ? 330 : 140;
645 caps->essl_feature_level = dev->arch >= 6 ? 320 : 310;
646
647 caps->constant_buffer_offset_alignment = 16;
648
649 /* v7 (only) restricts component orders with AFBC. To workaround, we
650 * compose format swizzles with texture swizzles. pan_texture.c motsly
651 * handles this but we need to fix up the border colour.
652 */
653 caps->texture_border_color_quirk = dev->arch == 7 || dev->arch >= 10 ?
654 PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO : 0;
655
656 caps->max_texel_buffer_elements = PAN_MAX_TEXEL_BUFFER_ELEMENTS;
657
658 /* Must be at least 64 for correct behaviour */
659 caps->texture_buffer_offset_alignment = 64;
660
661 caps->query_time_elapsed =
662 caps->query_timestamp =
663 dev->kmod.props.gpu_can_query_timestamp &&
664 dev->kmod.props.timestamp_frequency != 0;
665
666 caps->timer_resolution = pan_gpu_time_to_ns(dev, 1);
667
668 /* The hardware requires element alignment for data conversion to work
669 * as expected. If data conversion is not required, this restriction is
670 * lifted on Midgard at a performance penalty. We conservatively
671 * require element alignment for vertex buffers, using u_vbuf to
672 * translate to match the hardware requirement.
673 *
674 * This is less heavy-handed than PIPE_VERTEX_INPUT_ALIGNMENT_4BYTE, which
675 * would needlessly require alignment even for 8-bit formats.
676 */
677 caps->vertex_input_alignment = PIPE_VERTEX_INPUT_ALIGNMENT_ELEMENT;
678
679 caps->max_texture_2d_size = 1 << (PAN_MAX_MIP_LEVELS - 1);
680
681 caps->max_texture_3d_levels =
682 caps->max_texture_cube_levels = PAN_MAX_MIP_LEVELS;
683
684 /* pixel coord is in integer sysval on bifrost. */
685 caps->fs_coord_pixel_center_integer = dev->arch >= 6;
686 caps->fs_coord_pixel_center_half_integer = dev->arch < 6;
687
688 /* Hardware is upper left */
689 caps->fs_coord_origin_lower_left = false;
690
691 caps->fs_coord_origin_upper_left = true;
692 caps->tgsi_texcoord = true;
693
694 /* We would prefer varyings on Midgard, but proper sysvals on Bifrost */
695 caps->fs_face_is_integer_sysval =
696 caps->fs_position_is_sysval =
697 caps->fs_point_is_sysval = dev->arch >= 6;
698
699 caps->seamless_cube_map = true;
700 caps->seamless_cube_map_per_texture = true;
701
702 caps->max_vertex_element_src_offset = 0xffff;
703
704 caps->texture_transfer_modes = 0;
705
706 caps->endianness = PIPE_ENDIAN_NATIVE;
707
708 caps->max_texture_gather_components = 4;
709
710 caps->min_texture_gather_offset = -8;
711
712 caps->max_texture_gather_offset = 7;
713
714 uint64_t system_memory;
715 caps->video_memory = os_get_total_physical_memory(&system_memory) ?
716 system_memory >> 20 : 0;
717
718 caps->shader_stencil_export = true;
719 caps->conditional_render = true;
720 caps->conditional_render_inverted = true;
721
722 caps->shader_buffer_offset_alignment = 4;
723
724 caps->max_varyings = dev->arch >= 9 ? 16 : 32;
725
726 /* Removed in v6 (Bifrost) */
727 caps->gl_clamp =
728 caps->texture_mirror_clamp =
729 caps->alpha_test = dev->arch <= 5;
730
731 /* Removed in v9 (Valhall). PRIMTIIVE_RESTART_FIXED_INDEX is of course
732 * still supported as it is core GLES3.0 functionality
733 */
734 caps->emulate_nonfixed_primitive_restart = dev->arch >= 9;
735
736 caps->flatshade = false;
737 caps->two_sided_color = false;
738 caps->clip_planes = 0;
739
740 caps->packed_stream_output = false;
741
742 caps->viewport_transform_lowered = true;
743 caps->psiz_clamped = true;
744
745 caps->nir_images_as_deref = false;
746
747 caps->draw_indirect = true;
748
749 caps->multi_draw_indirect = dev->arch >= 10;
750
751 caps->start_instance =
752 caps->draw_parameters = pan_is_bifrost(dev);
753
754 /* Mali supports GLES and QUADS. Midgard and v6 Bifrost
755 * support more */
756 uint32_t modes = BITFIELD_MASK(MESA_PRIM_QUADS + 1);
757
758 if (dev->arch <= 6) {
759 modes |= BITFIELD_BIT(MESA_PRIM_QUAD_STRIP);
760 modes |= BITFIELD_BIT(MESA_PRIM_POLYGON);
761 }
762
763 if (dev->arch >= 9) {
764 /* Although Valhall is supposed to support quads, they
765 * don't seem to work correctly. Disable to fix
766 * arb-provoking-vertex-render.
767 */
768 modes &= ~BITFIELD_BIT(MESA_PRIM_QUADS);
769 }
770
771 caps->supported_prim_modes =
772 caps->supported_prim_modes_with_restart = modes;
773
774 caps->image_store_formatted = true;
775
776 caps->native_fence_fd = true;
777
778 caps->context_priority_mask =
779 from_kmod_group_allow_priority_flags(
780 dev->kmod.props.allowed_group_priorities_mask);
781
782 caps->astc_decode_mode = dev->arch >= 9 && (dev->compressed_formats & (1 << 30));
783
784 caps->min_line_width =
785 caps->min_line_width_aa =
786 caps->min_point_size =
787 caps->min_point_size_aa = 1;
788
789 caps->point_size_granularity =
790 caps->line_width_granularity = 0.0625;
791
792 caps->max_line_width =
793 caps->max_line_width_aa =
794 caps->max_point_size =
795 caps->max_point_size_aa = 4095.9375;
796
797 caps->max_texture_anisotropy = 16.0;
798
799 caps->max_texture_lod_bias = 16.0; /* arbitrary */
800 }
801
802 static void
panfrost_destroy_screen(struct pipe_screen * pscreen)803 panfrost_destroy_screen(struct pipe_screen *pscreen)
804 {
805 struct panfrost_device *dev = pan_device(pscreen);
806 struct panfrost_screen *screen = pan_screen(pscreen);
807
808 panfrost_resource_screen_destroy(pscreen);
809 panfrost_pool_cleanup(&screen->mempools.bin);
810 panfrost_pool_cleanup(&screen->mempools.desc);
811 pan_blend_shader_cache_cleanup(&dev->blend_shaders);
812
813 if (screen->vtbl.screen_destroy)
814 screen->vtbl.screen_destroy(pscreen);
815
816 if (dev->ro)
817 dev->ro->destroy(dev->ro);
818 panfrost_close_device(dev);
819
820 disk_cache_destroy(screen->disk_cache);
821 ralloc_free(pscreen);
822 }
823
824 static const void *
panfrost_screen_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)825 panfrost_screen_get_compiler_options(struct pipe_screen *pscreen,
826 enum pipe_shader_ir ir,
827 enum pipe_shader_type shader)
828 {
829 return pan_screen(pscreen)->vtbl.get_compiler_options();
830 }
831
832 static struct disk_cache *
panfrost_get_disk_shader_cache(struct pipe_screen * pscreen)833 panfrost_get_disk_shader_cache(struct pipe_screen *pscreen)
834 {
835 return pan_screen(pscreen)->disk_cache;
836 }
837
838 static int
panfrost_get_screen_fd(struct pipe_screen * pscreen)839 panfrost_get_screen_fd(struct pipe_screen *pscreen)
840 {
841 return panfrost_device_fd(pan_device(pscreen));
842 }
843
844 int
panfrost_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)845 panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
846 struct pipe_driver_query_info *info)
847 {
848 int num_queries = ARRAY_SIZE(panfrost_driver_query_list);
849
850 if (!info)
851 return num_queries;
852
853 if (index >= num_queries)
854 return 0;
855
856 *info = panfrost_driver_query_list[index];
857
858 return 1;
859 }
860
861 static uint64_t
panfrost_get_timestamp(struct pipe_screen * pscreen)862 panfrost_get_timestamp(struct pipe_screen *pscreen)
863 {
864 struct panfrost_device *dev = pan_device(pscreen);
865
866 return pan_gpu_time_to_ns(dev, pan_kmod_query_timestamp(dev->kmod.dev));
867 }
868
869 struct pipe_screen *
panfrost_create_screen(int fd,const struct pipe_screen_config * config,struct renderonly * ro)870 panfrost_create_screen(int fd, const struct pipe_screen_config *config,
871 struct renderonly *ro)
872 {
873 /* Create the screen */
874 struct panfrost_screen *screen = rzalloc(NULL, struct panfrost_screen);
875
876 if (!screen)
877 return NULL;
878
879 struct panfrost_device *dev = pan_device(&screen->base);
880
881 driParseConfigFiles(config->options, config->options_info, 0,
882 "panfrost", NULL, NULL, NULL, 0, NULL, 0);
883
884 /* Debug must be set first for pandecode to work correctly */
885 dev->debug =
886 debug_get_flags_option("PAN_MESA_DEBUG", panfrost_debug_options, 0);
887 screen->max_afbc_packing_ratio = debug_get_num_option(
888 "PAN_MAX_AFBC_PACKING_RATIO", DEFAULT_MAX_AFBC_PACKING_RATIO);
889
890 if (panfrost_open_device(screen, fd, dev)) {
891 ralloc_free(screen);
892 return NULL;
893 }
894
895 if (dev->debug & PAN_DBG_NO_AFBC)
896 dev->has_afbc = false;
897
898 /* Bail early on unsupported hardware */
899 if (dev->model == NULL) {
900 debug_printf("panfrost: Unsupported model %X",
901 panfrost_device_gpu_id(dev));
902 panfrost_destroy_screen(&(screen->base));
903 return NULL;
904 }
905
906 screen->force_afbc_packing = dev->debug & PAN_DBG_FORCE_PACK;
907 if (!screen->force_afbc_packing)
908 screen->force_afbc_packing = driQueryOptionb(config->options,
909 "pan_force_afbc_packing");
910
911 const char *option = debug_get_option("PAN_AFRC_RATE", NULL);
912 if (!option) {
913 screen->force_afrc_rate = -1;
914 } else if (strcmp(option, "default") == 0) {
915 screen->force_afrc_rate = PIPE_COMPRESSION_FIXED_RATE_DEFAULT;
916 } else {
917 int64_t rate =
918 debug_parse_num_option(option, PIPE_COMPRESSION_FIXED_RATE_NONE);
919 screen->force_afrc_rate = rate;
920 }
921
922 screen->csf_tiler_heap.chunk_size = driQueryOptioni(config->options,
923 "pan_csf_chunk_size");
924 screen->csf_tiler_heap.initial_chunks = driQueryOptioni(config->options,
925 "pan_csf_initial_chunks");
926 screen->csf_tiler_heap.max_chunks = driQueryOptioni(config->options,
927 "pan_csf_max_chunks");
928
929 dev->ro = ro;
930
931 screen->base.destroy = panfrost_destroy_screen;
932
933 screen->base.get_screen_fd = panfrost_get_screen_fd;
934 screen->base.get_name = panfrost_get_name;
935 screen->base.get_vendor = panfrost_get_vendor;
936 screen->base.get_device_vendor = panfrost_get_device_vendor;
937 screen->base.get_driver_query_info = panfrost_get_driver_query_info;
938 screen->base.get_shader_param = panfrost_get_shader_param;
939 screen->base.get_compute_param = panfrost_get_compute_param;
940 screen->base.get_timestamp = panfrost_get_timestamp;
941 screen->base.is_format_supported = panfrost_is_format_supported;
942 screen->base.query_dmabuf_modifiers = panfrost_query_dmabuf_modifiers;
943 screen->base.is_dmabuf_modifier_supported =
944 panfrost_is_dmabuf_modifier_supported;
945 screen->base.context_create = panfrost_create_context;
946 screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
947 screen->base.get_disk_shader_cache = panfrost_get_disk_shader_cache;
948 screen->base.fence_reference = panfrost_fence_reference;
949 screen->base.fence_finish = panfrost_fence_finish;
950 screen->base.fence_get_fd = panfrost_fence_get_fd;
951 screen->base.set_damage_region = panfrost_resource_set_damage_region;
952 screen->base.query_compression_rates = panfrost_query_compression_rates;
953 screen->base.query_compression_modifiers =
954 panfrost_query_compression_modifiers;
955
956 panfrost_resource_screen_init(&screen->base);
957 pan_blend_shader_cache_init(&dev->blend_shaders,
958 panfrost_device_gpu_id(dev));
959
960 panfrost_init_screen_caps(screen);
961
962 panfrost_disk_cache_init(screen);
963
964 if (panfrost_pool_init(&screen->mempools.bin, NULL, dev, PAN_BO_EXECUTE,
965 4096, "Preload shaders", false, true) ||
966 panfrost_pool_init(&screen->mempools.desc, NULL, dev, 0, 65536,
967 "Preload RSDs", false, true)) {
968 panfrost_destroy_screen(&(screen->base));
969 return NULL;
970 }
971
972 if (dev->arch == 4)
973 panfrost_cmdstream_screen_init_v4(screen);
974 else if (dev->arch == 5)
975 panfrost_cmdstream_screen_init_v5(screen);
976 else if (dev->arch == 6)
977 panfrost_cmdstream_screen_init_v6(screen);
978 else if (dev->arch == 7)
979 panfrost_cmdstream_screen_init_v7(screen);
980 else if (dev->arch == 9)
981 panfrost_cmdstream_screen_init_v9(screen);
982 else if (dev->arch == 10)
983 panfrost_cmdstream_screen_init_v10(screen);
984 else
985 unreachable("Unhandled architecture major");
986
987 return &screen->base;
988 }
989