• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 VMware, Inc.
3  * Copyright (C) 2014 Broadcom
4  * Copyright (C) 2018 Alyssa Rosenzweig
5  * Copyright (C) 2019 Collabora, Ltd.
6  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25  * SOFTWARE.
26  *
27  */
28 
29 #include "util/u_debug.h"
30 #include "util/u_memory.h"
31 #include "util/format/u_format.h"
32 #include "util/format/u_format_s3tc.h"
33 #include "util/u_video.h"
34 #include "util/u_screen.h"
35 #include "util/os_time.h"
36 #include "util/u_process.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_screen.h"
39 #include "draw/draw_context.h"
40 
41 #include <fcntl.h>
42 
43 #include "drm-uapi/drm_fourcc.h"
44 #include "drm-uapi/panfrost_drm.h"
45 
46 #include "pan_bo.h"
47 #include "pan_shader.h"
48 #include "pan_screen.h"
49 #include "pan_resource.h"
50 #include "pan_public.h"
51 #include "pan_util.h"
52 #include "decode.h"
53 
54 #include "pan_context.h"
55 
56 static const struct debug_named_value panfrost_debug_options[] = {
57         {"perf",      PAN_DBG_PERF,     "Enable performance warnings"},
58         {"trace",     PAN_DBG_TRACE,    "Trace the command stream"},
59         {"deqp",      PAN_DBG_DEQP,     "Hacks for dEQP"},
60         {"dirty",     PAN_DBG_DIRTY,    "Always re-emit all state"},
61         {"sync",      PAN_DBG_SYNC,     "Wait for each job's completion and abort on GPU faults"},
62         {"precompile", PAN_DBG_PRECOMPILE, "Precompile shaders for shader-db"},
63         {"nofp16",     PAN_DBG_NOFP16,     "Disable 16-bit support"},
64         {"gl3",       PAN_DBG_GL3,      "Enable experimental GL 3.x implementation, up to 3.3"},
65         {"noafbc",    PAN_DBG_NO_AFBC,  "Disable AFBC support"},
66         {"nocrc",     PAN_DBG_NO_CRC,   "Disable transaction elimination"},
67         {"msaa16",    PAN_DBG_MSAA16,   "Enable MSAA 8x and 16x support"},
68         {"indirect",  PAN_DBG_INDIRECT, "Use experimental compute kernel for indirect draws"},
69         {"linear",    PAN_DBG_LINEAR,   "Force linear textures"},
70         {"nocache",   PAN_DBG_NO_CACHE, "Disable BO cache"},
71         {"dump",      PAN_DBG_DUMP,     "Dump all graphics memory"},
72 #ifdef PAN_DBG_OVERFLOW
73         {"overflow",  PAN_DBG_OVERFLOW, "Check for buffer overflows in pool uploads"},
74 #endif
75         DEBUG_NAMED_VALUE_END
76 };
77 
78 static const char *
panfrost_get_name(struct pipe_screen * screen)79 panfrost_get_name(struct pipe_screen *screen)
80 {
81         return pan_device(screen)->model->name;
82 }
83 
84 static const char *
panfrost_get_vendor(struct pipe_screen * screen)85 panfrost_get_vendor(struct pipe_screen *screen)
86 {
87         return "Panfrost";
88 }
89 
90 static const char *
panfrost_get_device_vendor(struct pipe_screen * screen)91 panfrost_get_device_vendor(struct pipe_screen *screen)
92 {
93         return "Arm";
94 }
95 
96 static int
panfrost_get_param(struct pipe_screen * screen,enum pipe_cap param)97 panfrost_get_param(struct pipe_screen *screen, enum pipe_cap param)
98 {
99         struct panfrost_device *dev = pan_device(screen);
100 
101         /* Our GL 3.x implementation is WIP */
102         bool is_gl3 = dev->debug & (PAN_DBG_GL3 | PAN_DBG_DEQP);
103 
104         /* Native MRT is introduced with v5 */
105         bool has_mrt = (dev->arch >= 5);
106 
107         /* Only kernel drivers >= 1.1 can allocate HEAP BOs */
108         bool has_heap = dev->kernel_version->version_major > 1 ||
109                         dev->kernel_version->version_minor >= 1;
110 
111         switch (param) {
112         case PIPE_CAP_NPOT_TEXTURES:
113         case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
114         case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
115         case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
116         case PIPE_CAP_POINT_SPRITE:
117         case PIPE_CAP_DEPTH_CLIP_DISABLE:
118         case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
119         case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
120         case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
121         case PIPE_CAP_FRONTEND_NOOP:
122         case PIPE_CAP_SAMPLE_SHADING:
123         case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
124         case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
125         case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
126         case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
127         case PIPE_CAP_NATIVE_FENCE_FD:
128                 return 1;
129 
130         case PIPE_CAP_MAX_RENDER_TARGETS:
131         case PIPE_CAP_FBFETCH:
132         case PIPE_CAP_FBFETCH_COHERENT:
133                 return has_mrt ? 8 : 1;
134 
135         case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
136                 return 1;
137 
138         case PIPE_CAP_OCCLUSION_QUERY:
139         case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
140                 return true;
141 
142         case PIPE_CAP_ANISOTROPIC_FILTER:
143                 return dev->revision >= dev->model->min_rev_anisotropic;
144 
145         /* Compile side is done for Bifrost, Midgard TODO. Needs some kernel
146          * work to turn on, since CYCLE_COUNT_START needs to be issued. In
147          * kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
148          * yet way to request this with mainline TODO */
149         case PIPE_CAP_SHADER_CLOCK:
150                 return 0;
151 
152         case PIPE_CAP_VS_INSTANCEID:
153         case PIPE_CAP_TEXTURE_MULTISAMPLE:
154         case PIPE_CAP_SURFACE_SAMPLE_COUNT:
155                 return true;
156 
157         case PIPE_CAP_SAMPLER_VIEW_TARGET:
158         case PIPE_CAP_TEXTURE_SWIZZLE:
159         case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
160         case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
161         case PIPE_CAP_BLEND_EQUATION_SEPARATE:
162         case PIPE_CAP_INDEP_BLEND_ENABLE:
163         case PIPE_CAP_INDEP_BLEND_FUNC:
164         case PIPE_CAP_GENERATE_MIPMAP:
165         case PIPE_CAP_ACCELERATED:
166         case PIPE_CAP_UMA:
167         case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
168         case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
169         case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
170         case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
171         case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
172         case PIPE_CAP_TEXTURE_BUFFER_SAMPLER:
173         case PIPE_CAP_PACKED_UNIFORMS:
174         case PIPE_CAP_IMAGE_LOAD_FORMATTED:
175         case PIPE_CAP_CUBE_MAP_ARRAY:
176         case PIPE_CAP_COMPUTE:
177                 return 1;
178 
179         /* We need this for OES_copy_image, but currently there are some awful
180          * interactions with AFBC that need to be worked out. */
181         case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
182                 return 0;
183 
184         case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
185                 return PIPE_MAX_SO_BUFFERS;
186 
187         case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
188         case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
189                 return PIPE_MAX_SO_OUTPUTS;
190 
191         case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
192         case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
193                 return 1;
194 
195         case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
196                 return 256;
197 
198         case PIPE_CAP_GLSL_FEATURE_LEVEL:
199         case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
200                 return is_gl3 ? 330 : 140;
201         case PIPE_CAP_ESSL_FEATURE_LEVEL:
202                 return dev->arch >= 6 ? 320 : 310;
203 
204         case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
205                 return 16;
206 
207         case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
208                 return 65536;
209 
210         /* Must be at least 64 for correct behaviour */
211         case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
212                 return 64;
213 
214         case PIPE_CAP_QUERY_TIMESTAMP:
215                 return is_gl3;
216 
217         /* TODO: Where does this req come from in practice? */
218         case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
219                 return 1;
220 
221         case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
222                 return 1 << (MAX_MIP_LEVELS - 1);
223 
224         case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
225         case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
226                 return MAX_MIP_LEVELS;
227 
228         case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
229         case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
230                 /* Hardware is upper left. Pixel center at (0.5, 0.5) */
231                 return 0;
232 
233         case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
234         case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
235         case PIPE_CAP_TGSI_TEXCOORD:
236                 return 1;
237 
238         /* We would prefer varyings on Midgard, but proper sysvals on Bifrost */
239         case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
240         case PIPE_CAP_FS_POSITION_IS_SYSVAL:
241         case PIPE_CAP_FS_POINT_IS_SYSVAL:
242                 return dev->arch >= 6;
243 
244         case PIPE_CAP_SEAMLESS_CUBE_MAP:
245         case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
246                 return true;
247 
248         case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
249                 return 0xffff;
250 
251         case PIPE_CAP_TEXTURE_TRANSFER_MODES:
252                 return 0;
253 
254         case PIPE_CAP_ENDIANNESS:
255                 return PIPE_ENDIAN_NATIVE;
256 
257         case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
258                 return 4;
259 
260         case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
261                 return -8;
262 
263         case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
264                 return 7;
265 
266         case PIPE_CAP_VIDEO_MEMORY: {
267                 uint64_t system_memory;
268 
269                 if (!os_get_total_physical_memory(&system_memory))
270                         return 0;
271 
272                 return (int)(system_memory >> 20);
273         }
274 
275         case PIPE_CAP_SHADER_STENCIL_EXPORT:
276         case PIPE_CAP_CONDITIONAL_RENDER:
277         case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
278                 return true;
279 
280         case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
281                 return 4;
282 
283         case PIPE_CAP_MAX_VARYINGS:
284                 /* Return the GLSL maximum. The internal maximum
285                  * PAN_MAX_VARYINGS accommodates internal varyings. */
286                 return MAX_VARYING;
287 
288         /* Removed in v6 (Bifrost) */
289         case PIPE_CAP_GL_CLAMP:
290         case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
291         case PIPE_CAP_ALPHA_TEST:
292                 return dev->arch <= 5;
293 
294         /* Removed in v9 (Valhall). PRIMTIIVE_RESTART_FIXED_INDEX is of course
295          * still supported as it is core GLES3.0 functionality
296          */
297         case PIPE_CAP_PRIMITIVE_RESTART:
298                 return dev->arch <= 7;
299 
300         case PIPE_CAP_FLATSHADE:
301         case PIPE_CAP_TWO_SIDED_COLOR:
302         case PIPE_CAP_CLIP_PLANES:
303                 return 0;
304 
305         case PIPE_CAP_PACKED_STREAM_OUTPUT:
306                 return 0;
307 
308         case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED:
309         case PIPE_CAP_PSIZ_CLAMPED:
310                 return 1;
311 
312         case PIPE_CAP_NIR_IMAGES_AS_DEREF:
313                 return 0;
314 
315         case PIPE_CAP_DRAW_INDIRECT:
316                 return has_heap;
317 
318         case PIPE_CAP_START_INSTANCE:
319         case PIPE_CAP_DRAW_PARAMETERS:
320                 return pan_is_bifrost(dev);
321 
322         case PIPE_CAP_SUPPORTED_PRIM_MODES:
323         case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: {
324                 /* Mali supports GLES and QUADS. Midgard and v6 Bifrost
325                  * support more */
326                 uint32_t modes = BITFIELD_MASK(PIPE_PRIM_QUADS + 1);
327 
328                 if (dev->arch <= 6) {
329                         modes |= BITFIELD_BIT(PIPE_PRIM_QUAD_STRIP);
330                         modes |= BITFIELD_BIT(PIPE_PRIM_POLYGON);
331                 }
332 
333                 if (dev->arch >= 9) {
334                         /* Although Valhall is supposed to support quads, they
335                          * don't seem to work correctly. Disable to fix
336                          * arb-provoking-vertex-render.
337                          */
338                         modes &= ~BITFIELD_BIT(PIPE_PRIM_QUADS);
339                 }
340 
341                 return modes;
342         }
343 
344         case PIPE_CAP_IMAGE_STORE_FORMATTED:
345                 return 1;
346 
347         default:
348                 return u_pipe_screen_get_param_defaults(screen, param);
349         }
350 }
351 
352 static int
panfrost_get_shader_param(struct pipe_screen * screen,enum pipe_shader_type shader,enum pipe_shader_cap param)353 panfrost_get_shader_param(struct pipe_screen *screen,
354                           enum pipe_shader_type shader,
355                           enum pipe_shader_cap param)
356 {
357         struct panfrost_device *dev = pan_device(screen);
358         bool is_nofp16 = dev->debug & PAN_DBG_NOFP16;
359         bool is_deqp = dev->debug & PAN_DBG_DEQP;
360 
361         switch (shader) {
362         case PIPE_SHADER_VERTEX:
363         case PIPE_SHADER_FRAGMENT:
364         case PIPE_SHADER_COMPUTE:
365                 break;
366         default:
367                 return 0;
368         }
369 
370         /* We only allow observable side effects (memory writes) in compute and
371          * fragment shaders. Side effects in the geometry pipeline cause
372          * trouble with IDVS.
373          *
374          * This restriction doesn't apply to Midgard, which does not implement
375          * IDVS and therefore executes vertex shaders exactly once.
376          */
377         bool allow_side_effects = (shader != PIPE_SHADER_VERTEX) ||
378                                   (dev->arch <= 5);
379 
380         switch (param) {
381         case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
382         case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
383         case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
384         case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
385                 return 16384; /* arbitrary */
386 
387         case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
388                 return 1024; /* arbitrary */
389 
390         case PIPE_SHADER_CAP_MAX_INPUTS:
391                 /* Used as ABI on Midgard */
392                 return 16;
393 
394         case PIPE_SHADER_CAP_MAX_OUTPUTS:
395                 return shader == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
396 
397         case PIPE_SHADER_CAP_MAX_TEMPS:
398                 return 256; /* arbitrary */
399 
400         case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
401                 return 16 * 1024 * sizeof(float);
402 
403         case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
404                 STATIC_ASSERT(PAN_MAX_CONST_BUFFERS < 0x100);
405                 return PAN_MAX_CONST_BUFFERS;
406 
407         case PIPE_SHADER_CAP_CONT_SUPPORTED:
408                 return 0;
409 
410         case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
411                 return 1;
412         case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
413                 return 0;
414 
415         case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
416                 return dev->arch >= 6;
417 
418         case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
419                 return 1;
420 
421         case PIPE_SHADER_CAP_SUBROUTINES:
422                 return 0;
423 
424         case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
425                 return 0;
426 
427         case PIPE_SHADER_CAP_INTEGERS:
428                 return 1;
429 
430         /* The Bifrost compiler supports full 16-bit. Midgard could but int16
431          * support is untested, so restrict INT16 to Bifrost. Midgard
432          * architecturally cannot support fp16 derivatives. */
433 
434         case PIPE_SHADER_CAP_FP16:
435         case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
436                 return !is_nofp16;
437         case PIPE_SHADER_CAP_FP16_DERIVATIVES:
438         case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
439                 return dev->arch >= 6 && !is_nofp16;
440         case PIPE_SHADER_CAP_INT16:
441                 /* XXX: Advertise this CAP when a proper fix to lower_precision
442                  * lands. GLSL IR validation failure in glmark2 -bterrain */
443                 return dev->arch >= 6 && !is_nofp16 && is_deqp;
444 
445         case PIPE_SHADER_CAP_INT64_ATOMICS:
446         case PIPE_SHADER_CAP_DROUND_SUPPORTED:
447         case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED:
448         case PIPE_SHADER_CAP_LDEXP_SUPPORTED:
449         case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
450                 return 0;
451 
452         case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
453                 STATIC_ASSERT(PIPE_MAX_SAMPLERS < 0x10000);
454                 return PIPE_MAX_SAMPLERS;
455 
456         case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
457                 STATIC_ASSERT(PIPE_MAX_SHADER_SAMPLER_VIEWS < 0x10000);
458                 return PIPE_MAX_SHADER_SAMPLER_VIEWS;
459 
460         case PIPE_SHADER_CAP_PREFERRED_IR:
461                 return PIPE_SHADER_IR_NIR;
462 
463         case PIPE_SHADER_CAP_SUPPORTED_IRS:
464                 return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_NIR_SERIALIZED);
465 
466         case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
467                 return allow_side_effects ? 16 : 0;
468 
469         case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
470                 return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;
471 
472         case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
473         case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
474                 return 0;
475 
476         default:
477                 return 0;
478         }
479 
480         return 0;
481 }
482 
483 static float
panfrost_get_paramf(struct pipe_screen * screen,enum pipe_capf param)484 panfrost_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
485 {
486         switch (param) {
487         case PIPE_CAPF_MIN_LINE_WIDTH:
488         case PIPE_CAPF_MIN_LINE_WIDTH_AA:
489         case PIPE_CAPF_MIN_POINT_SIZE:
490         case PIPE_CAPF_MIN_POINT_SIZE_AA:
491            return 1;
492 
493         case PIPE_CAPF_POINT_SIZE_GRANULARITY:
494         case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
495            return 0.0625;
496 
497         case PIPE_CAPF_MAX_LINE_WIDTH:
498         case PIPE_CAPF_MAX_LINE_WIDTH_AA:
499         case PIPE_CAPF_MAX_POINT_SIZE:
500         case PIPE_CAPF_MAX_POINT_SIZE_AA:
501                 return 4095.9375;
502 
503         case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
504                 return 16.0;
505 
506         case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
507                 return 16.0; /* arbitrary */
508 
509         case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
510         case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
511         case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
512                 return 0.0f;
513 
514         default:
515                 debug_printf("Unexpected PIPE_CAPF %d query\n", param);
516                 return 0.0;
517         }
518 }
519 
520 /**
521  * Query format support for creating a texture, drawing surface, etc.
522  * \param format  the format to test
523  * \param type  one of PIPE_TEXTURE, PIPE_SURFACE
524  */
525 static bool
panfrost_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned bind)526 panfrost_is_format_supported( struct pipe_screen *screen,
527                               enum pipe_format format,
528                               enum pipe_texture_target target,
529                               unsigned sample_count,
530                               unsigned storage_sample_count,
531                               unsigned bind)
532 {
533         struct panfrost_device *dev = pan_device(screen);
534         const struct util_format_description *format_desc;
535 
536         assert(target == PIPE_BUFFER ||
537                target == PIPE_TEXTURE_1D ||
538                target == PIPE_TEXTURE_1D_ARRAY ||
539                target == PIPE_TEXTURE_2D ||
540                target == PIPE_TEXTURE_2D_ARRAY ||
541                target == PIPE_TEXTURE_RECT ||
542                target == PIPE_TEXTURE_3D ||
543                target == PIPE_TEXTURE_CUBE ||
544                target == PIPE_TEXTURE_CUBE_ARRAY);
545 
546         format_desc = util_format_description(format);
547 
548         /* MSAA 2x gets rounded up to 4x. MSAA 8x/16x only supported on v5+.
549          * TODO: debug MSAA 8x/16x */
550 
551         switch (sample_count) {
552         case 0:
553         case 1:
554         case 4:
555                 break;
556         case 8:
557         case 16:
558                 if (dev->debug & PAN_DBG_MSAA16)
559                         break;
560                 else
561                         return false;
562         default:
563                 return false;
564         }
565 
566         if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
567                 return false;
568 
569         /* Z16 causes dEQP failures on t720 */
570         if (format == PIPE_FORMAT_Z16_UNORM && dev->arch <= 4)
571                 return false;
572 
573         /* Check we support the format with the given bind */
574 
575         unsigned relevant_bind = bind &
576                 ( PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET
577                 | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_SAMPLER_VIEW);
578 
579         struct panfrost_format fmt = dev->formats[format];
580 
581         /* Also check that compressed texture formats are supported on this
582          * particular chip. They may not be depending on system integration
583          * differences. RGTC can be emulated so is always supported. */
584 
585         bool is_rgtc = format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC;
586         bool supported = panfrost_supports_compressed_format(dev,
587                         MALI_EXTRACT_INDEX(fmt.hw));
588 
589         if (!is_rgtc && !supported)
590                 return false;
591 
592         return MALI_EXTRACT_INDEX(fmt.hw) && ((relevant_bind & ~fmt.bind) == 0);
593 }
594 
595 /* We always support linear and tiled operations, both external and internal.
596  * We support AFBC for a subset of formats, and colourspace transform for a
597  * subset of those. */
598 
599 static void
panfrost_walk_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count,uint64_t test_modifier)600 panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen,
601                 enum pipe_format format, int max, uint64_t *modifiers, unsigned
602                 int *external_only, int *out_count, uint64_t test_modifier)
603 {
604         /* Query AFBC status */
605         struct panfrost_device *dev = pan_device(screen);
606         bool afbc = dev->has_afbc && panfrost_format_supports_afbc(dev, format);
607         bool ytr = panfrost_afbc_can_ytr(format);
608         bool tiled_afbc = panfrost_afbc_can_tile(dev);
609 
610         unsigned count = 0;
611 
612         for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
613                 if (drm_is_afbc(pan_best_modifiers[i]) && !afbc)
614                         continue;
615 
616                 if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_YTR) && !ytr)
617                         continue;
618 
619                 if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_TILED) && !tiled_afbc)
620                         continue;
621 
622                 if (test_modifier != DRM_FORMAT_MOD_INVALID &&
623                     test_modifier != pan_best_modifiers[i])
624                         continue;
625 
626                 count++;
627 
628                 if (max > (int) count) {
629                         modifiers[count] = pan_best_modifiers[i];
630 
631                         if (external_only)
632                                 external_only[count] = false;
633                 }
634         }
635 
636         *out_count = count;
637 }
638 
639 static void
panfrost_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count)640 panfrost_query_dmabuf_modifiers(struct pipe_screen *screen,
641                 enum pipe_format format, int max, uint64_t *modifiers, unsigned
642                 int *external_only, int *out_count)
643 {
644         panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers,
645                 external_only, out_count, DRM_FORMAT_MOD_INVALID);
646 }
647 
648 static bool
panfrost_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)649 panfrost_is_dmabuf_modifier_supported(struct pipe_screen *screen,
650                 uint64_t modifier, enum pipe_format format,
651                 bool *external_only)
652 {
653         uint64_t unused;
654         unsigned int uint_extern_only = 0;
655         int count;
656 
657         panfrost_walk_dmabuf_modifiers(screen, format, 1, &unused,
658                 &uint_extern_only, &count, modifier);
659 
660         if (external_only)
661            *external_only = uint_extern_only ? true : false;
662 
663         return count > 0;
664 }
665 
666 static int
panfrost_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)667 panfrost_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
668                 enum pipe_compute_cap param, void *ret)
669 {
670         struct panfrost_device *dev = pan_device(pscreen);
671         const char * const ir = "panfrost";
672 
673 #define RET(x) do {                  \
674    if (ret)                          \
675       memcpy(ret, x, sizeof(x));     \
676    return sizeof(x);                 \
677 } while (0)
678 
679 	switch (param) {
680 	case PIPE_COMPUTE_CAP_ADDRESS_BITS:
681 		RET((uint32_t []){ 64 });
682 
683 	case PIPE_COMPUTE_CAP_IR_TARGET:
684 		if (ret)
685 			sprintf(ret, "%s", ir);
686 		return strlen(ir) * sizeof(char);
687 
688 	case PIPE_COMPUTE_CAP_GRID_DIMENSION:
689 		RET((uint64_t []) { 3 });
690 
691 	case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
692 		RET(((uint64_t []) { 65535, 65535, 65535 }));
693 
694         case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
695                 /* Unpredictable behaviour at larger sizes. Mali-G52 advertises
696                  * 384x384x384.
697                  *
698                  * On Midgard, we don't allow more than 128 threads in each
699                  * direction to match PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK.
700                  * That still exceeds the minimum-maximum.
701                  */
702                 if (dev->arch >= 6)
703                         RET(((uint64_t []) { 256, 256, 256 }));
704                 else
705                         RET(((uint64_t []) { 128, 128, 128 }));
706 
707 	case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
708                 /* On Bifrost and newer, all GPUs can support at least 256 threads
709                  * regardless of register usage, so we report 256.
710                  *
711                  * On Midgard, with maximum register usage, the maximum
712                  * thread count is only 64. We would like to report 64 here, but
713                  * the GLES3.1 spec minimum is 128, so we report 128 and limit
714                  * the register allocation of affected compute kernels.
715                  */
716 		RET((uint64_t []) { dev->arch >= 6 ? 256 : 128 });
717 
718 	case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
719 		RET((uint64_t []) { 1024*1024*512 /* Maybe get memory */ });
720 
721 	case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
722 		RET((uint64_t []) { 32768 });
723 
724 	case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
725 	case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
726 		RET((uint64_t []) { 4096 });
727 
728 	case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
729 		RET((uint64_t []) { 1024*1024*512 /* Maybe get memory */ });
730 
731 	case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
732 		RET((uint32_t []) { 800 /* MHz -- TODO */ });
733 
734 	case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
735 		RET((uint32_t []) { dev->core_count });
736 
737 	case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
738 		RET((uint32_t []) { 1 });
739 
740 	case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
741 		RET((uint32_t []) { pan_subgroup_size(dev->arch) });
742 
743 	case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
744 		RET((uint64_t []) { 1024 }); // TODO
745 	}
746 
747 	return 0;
748 }
749 
750 static void
panfrost_destroy_screen(struct pipe_screen * pscreen)751 panfrost_destroy_screen(struct pipe_screen *pscreen)
752 {
753         struct panfrost_device *dev = pan_device(pscreen);
754         struct panfrost_screen *screen = pan_screen(pscreen);
755 
756         panfrost_resource_screen_destroy(pscreen);
757         panfrost_pool_cleanup(&screen->indirect_draw.bin_pool);
758         panfrost_pool_cleanup(&screen->blitter.bin_pool);
759         panfrost_pool_cleanup(&screen->blitter.desc_pool);
760         pan_blend_shaders_cleanup(dev);
761 
762         if (screen->vtbl.screen_destroy)
763                 screen->vtbl.screen_destroy(pscreen);
764 
765         if (dev->ro)
766                 dev->ro->destroy(dev->ro);
767         panfrost_close_device(dev);
768         ralloc_free(pscreen);
769 }
770 
771 static uint64_t
panfrost_get_timestamp(struct pipe_screen * _screen)772 panfrost_get_timestamp(struct pipe_screen *_screen)
773 {
774         return os_time_get_nano();
775 }
776 
777 static void
panfrost_fence_reference(struct pipe_screen * pscreen,struct pipe_fence_handle ** ptr,struct pipe_fence_handle * fence)778 panfrost_fence_reference(struct pipe_screen *pscreen,
779                          struct pipe_fence_handle **ptr,
780                          struct pipe_fence_handle *fence)
781 {
782         struct panfrost_device *dev = pan_device(pscreen);
783         struct pipe_fence_handle *old = *ptr;
784 
785         if (pipe_reference(&old->reference, &fence->reference)) {
786                 drmSyncobjDestroy(dev->fd, old->syncobj);
787                 free(old);
788         }
789 
790         *ptr = fence;
791 }
792 
793 static int
panfrost_fence_get_fd(struct pipe_screen * _screen,struct pipe_fence_handle * fence)794 panfrost_fence_get_fd(struct pipe_screen *_screen,
795                        struct pipe_fence_handle *fence)
796 {
797         struct panfrost_device *dev = pan_device(_screen);
798         int fd = -1;
799         drmSyncobjExportSyncFile(dev->fd, fence->syncobj, &fd);
800         return fd;
801 }
802 
803 static bool
panfrost_fence_finish(struct pipe_screen * pscreen,struct pipe_context * ctx,struct pipe_fence_handle * fence,uint64_t timeout)804 panfrost_fence_finish(struct pipe_screen *pscreen,
805                       struct pipe_context *ctx,
806                       struct pipe_fence_handle *fence,
807                       uint64_t timeout)
808 {
809         struct panfrost_device *dev = pan_device(pscreen);
810         int ret;
811 
812         if (fence->signaled)
813                 return true;
814 
815         uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
816         if (abs_timeout == OS_TIMEOUT_INFINITE)
817                 abs_timeout = INT64_MAX;
818 
819         ret = drmSyncobjWait(dev->fd, &fence->syncobj,
820                              1,
821                              abs_timeout, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
822                              NULL);
823 
824         fence->signaled = (ret >= 0);
825         return fence->signaled;
826 }
827 
828 struct pipe_fence_handle *
panfrost_fence_create(struct panfrost_context * ctx)829 panfrost_fence_create(struct panfrost_context *ctx)
830 {
831         struct pipe_fence_handle *f = calloc(1, sizeof(*f));
832         if (!f)
833                 return NULL;
834 
835         struct panfrost_device *dev = pan_device(ctx->base.screen);
836         int fd = -1, ret;
837 
838         /* Snapshot the last rendering out fence. We'd rather have another
839          * syncobj instead of a sync file, but this is all we get.
840          * (HandleToFD/FDToHandle just gives you another syncobj ID for the
841          * same syncobj).
842          */
843         ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
844         if (ret || fd == -1) {
845                 fprintf(stderr, "export failed\n");
846                 goto err_free_fence;
847         }
848 
849         ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
850         if (ret) {
851                 fprintf(stderr, "create syncobj failed\n");
852                 goto err_close_fd;
853         }
854 
855         ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
856         if (ret) {
857                 fprintf(stderr, "create syncobj failed\n");
858                 goto err_destroy_syncobj;
859         }
860 
861         assert(f->syncobj != ctx->syncobj);
862         close(fd);
863         pipe_reference_init(&f->reference, 1);
864 
865         return f;
866 
867 err_destroy_syncobj:
868         drmSyncobjDestroy(dev->fd, f->syncobj);
869 err_close_fd:
870         close(fd);
871 err_free_fence:
872         free(f);
873         return NULL;
874 }
875 
876 static const void *
panfrost_screen_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)877 panfrost_screen_get_compiler_options(struct pipe_screen *pscreen,
878                                      enum pipe_shader_ir ir,
879                                      enum pipe_shader_type shader)
880 {
881         return pan_screen(pscreen)->vtbl.get_compiler_options();
882 }
883 
884 struct pipe_screen *
panfrost_create_screen(int fd,struct renderonly * ro)885 panfrost_create_screen(int fd, struct renderonly *ro)
886 {
887         /* Create the screen */
888         struct panfrost_screen *screen = rzalloc(NULL, struct panfrost_screen);
889 
890         if (!screen)
891                 return NULL;
892 
893         struct panfrost_device *dev = pan_device(&screen->base);
894 
895         /* Debug must be set first for pandecode to work correctly */
896         dev->debug = debug_get_flags_option("PAN_MESA_DEBUG", panfrost_debug_options, 0);
897         panfrost_open_device(screen, fd, dev);
898 
899         if (dev->debug & PAN_DBG_NO_AFBC)
900                 dev->has_afbc = false;
901 
902         /* Bail early on unsupported hardware */
903         if (dev->model == NULL) {
904                 debug_printf("panfrost: Unsupported model %X", dev->gpu_id);
905                 panfrost_destroy_screen(&(screen->base));
906                 return NULL;
907         }
908 
909         dev->ro = ro;
910 
911         screen->base.destroy = panfrost_destroy_screen;
912 
913         screen->base.get_name = panfrost_get_name;
914         screen->base.get_vendor = panfrost_get_vendor;
915         screen->base.get_device_vendor = panfrost_get_device_vendor;
916         screen->base.get_param = panfrost_get_param;
917         screen->base.get_shader_param = panfrost_get_shader_param;
918         screen->base.get_compute_param = panfrost_get_compute_param;
919         screen->base.get_paramf = panfrost_get_paramf;
920         screen->base.get_timestamp = panfrost_get_timestamp;
921         screen->base.is_format_supported = panfrost_is_format_supported;
922         screen->base.query_dmabuf_modifiers = panfrost_query_dmabuf_modifiers;
923         screen->base.is_dmabuf_modifier_supported =
924                panfrost_is_dmabuf_modifier_supported;
925         screen->base.context_create = panfrost_create_context;
926         screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
927         screen->base.fence_reference = panfrost_fence_reference;
928         screen->base.fence_finish = panfrost_fence_finish;
929         screen->base.set_damage_region = panfrost_resource_set_damage_region;
930 
931         panfrost_resource_screen_init(&screen->base);
932         pan_blend_shaders_init(dev);
933         panfrost_pool_init(&screen->indirect_draw.bin_pool, NULL, dev,
934                            PAN_BO_EXECUTE, 65536, "Indirect draw shaders",
935                            false, true);
936         panfrost_pool_init(&screen->blitter.bin_pool, NULL, dev, PAN_BO_EXECUTE,
937                            4096, "Blitter shaders", false, true);
938         panfrost_pool_init(&screen->blitter.desc_pool, NULL, dev, 0, 65536,
939                            "Blitter RSDs", false, true);
940         if (dev->arch == 4)
941                 panfrost_cmdstream_screen_init_v4(screen);
942         else if (dev->arch == 5)
943                 panfrost_cmdstream_screen_init_v5(screen);
944         else if (dev->arch == 6)
945                 panfrost_cmdstream_screen_init_v6(screen);
946         else if (dev->arch == 7)
947                 panfrost_cmdstream_screen_init_v7(screen);
948         else if (dev->arch == 9)
949                 panfrost_cmdstream_screen_init_v9(screen);
950         else
951                 unreachable("Unhandled architecture major");
952 
953         return &screen->base;
954 }
955