• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  *   Boris Brezillon <boris.brezillon@collabora.com>
26  */
27 
28 #include "util/macros.h"
29 
30 #include "genxml/gen_macros.h"
31 
32 #include "pan_desc.h"
33 #include "pan_encoder.h"
34 #include "pan_props.h"
35 #include "pan_texture.h"
36 
37 static unsigned
mod_to_block_fmt(uint64_t mod)38 mod_to_block_fmt(uint64_t mod)
39 {
40    switch (mod) {
41    case DRM_FORMAT_MOD_LINEAR:
42       return MALI_BLOCK_FORMAT_LINEAR;
43    case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
44       return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
45    default:
46 #if PAN_ARCH >= 5
47       if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED))
48          return MALI_BLOCK_FORMAT_AFBC;
49 #endif
50 #if PAN_ARCH >= 7
51       if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED))
52          return MALI_BLOCK_FORMAT_AFBC_TILED;
53 #endif
54 #if PAN_ARCH >= 10
55       if (drm_is_afrc(mod))
56          return 0; /* Reserved field for AFRC state */
57 #endif
58 
59       unreachable("Unsupported modifer");
60    }
61 }
62 
63 static enum mali_msaa
mali_sampling_mode(const struct pan_image_view * view)64 mali_sampling_mode(const struct pan_image_view *view)
65 {
66    unsigned nr_samples = pan_image_view_get_nr_samples(view);
67 
68    if (nr_samples > 1) {
69       ASSERTED const struct pan_image *first_plane =
70          pan_image_view_get_first_plane(view);
71 
72       assert(view->nr_samples == nr_samples);
73       assert(first_plane->layout.slices[0].surface_stride != 0);
74       return MALI_MSAA_LAYERED;
75    }
76 
77    if (view->nr_samples > nr_samples) {
78       assert(nr_samples == 1);
79       return MALI_MSAA_AVERAGE;
80    }
81 
82    assert(view->nr_samples == nr_samples);
83    assert(view->nr_samples == 1);
84 
85    return MALI_MSAA_SINGLE;
86 }
87 
88 static bool
renderblock_fits_in_single_pass(const struct pan_image_view * view,unsigned tile_size)89 renderblock_fits_in_single_pass(const struct pan_image_view *view,
90                                 unsigned tile_size)
91 {
92    const struct pan_image *first_plane = pan_image_view_get_first_plane(view);
93    uint64_t mod = first_plane->layout.modifier;
94 
95    if (!drm_is_afbc(mod))
96       return tile_size >= 16 * 16;
97 
98    struct pan_block_size renderblk_sz = panfrost_afbc_renderblock_size(mod);
99    return tile_size >= renderblk_sz.width * renderblk_sz.height;
100 }
101 
102 int
GENX(pan_select_crc_rt)103 GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
104 {
105    /* Disable CRC when the tile size is smaller than 16x16. In the hardware,
106     * CRC tiles are the same size as the tiles of the framebuffer. However,
107     * our code only handles 16x16 tiles. Therefore under the current
108     * implementation, we must disable CRC when 16x16 tiles are not used.
109     *
110     * This may hurt performance. However, smaller tile sizes are rare, and
111     * CRCs are more expensive at smaller tile sizes, reducing the benefit.
112     * Restricting CRC to 16x16 should work in practice.
113     */
114    if (tile_size < 16 * 16)
115       return -1;
116 
117 #if PAN_ARCH <= 6
118    if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
119        pan_image_view_has_crc(fb->rts[0].view))
120       return 0;
121 
122    return -1;
123 #else
124    bool best_rt_valid = false;
125    int best_rt = -1;
126 
127    for (unsigned i = 0; i < fb->rt_count; i++) {
128       if (!fb->rts[i].view || fb->rts[i].discard ||
129           !pan_image_view_has_crc(fb->rts[i].view))
130          continue;
131 
132       if (!renderblock_fits_in_single_pass(fb->rts[i].view, tile_size))
133          continue;
134 
135       bool valid = *(fb->rts[i].crc_valid);
136       bool full = !fb->extent.minx && !fb->extent.miny &&
137                   fb->extent.maxx == (fb->width - 1) &&
138                   fb->extent.maxy == (fb->height - 1);
139       if (!full && !valid)
140          continue;
141 
142       if (best_rt < 0 || (valid && !best_rt_valid)) {
143          best_rt = i;
144          best_rt_valid = valid;
145       }
146 
147       if (valid)
148          break;
149    }
150 
151    return best_rt;
152 #endif
153 }
154 
155 static enum mali_zs_format
translate_zs_format(enum pipe_format in)156 translate_zs_format(enum pipe_format in)
157 {
158    switch (in) {
159    case PIPE_FORMAT_Z16_UNORM:
160       return MALI_ZS_FORMAT_D16;
161    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
162       return MALI_ZS_FORMAT_D24S8;
163    case PIPE_FORMAT_Z24X8_UNORM:
164       return MALI_ZS_FORMAT_D24X8;
165    case PIPE_FORMAT_Z32_FLOAT:
166       return MALI_ZS_FORMAT_D32;
167 #if PAN_ARCH <= 7
168    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
169       return MALI_ZS_FORMAT_D32_S8X24;
170 #endif
171    default:
172       unreachable("Unsupported depth/stencil format.");
173    }
174 }
175 
176 #if PAN_ARCH >= 5
177 static enum mali_s_format
translate_s_format(enum pipe_format in)178 translate_s_format(enum pipe_format in)
179 {
180    switch (in) {
181    case PIPE_FORMAT_S8_UINT:
182       return MALI_S_FORMAT_S8;
183    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
184    case PIPE_FORMAT_X24S8_UINT:
185       return MALI_S_FORMAT_X24S8;
186 
187 #if PAN_ARCH <= 7
188    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
189    case PIPE_FORMAT_S8X24_UINT:
190       return MALI_S_FORMAT_S8X24;
191    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
192    case PIPE_FORMAT_X32_S8X24_UINT:
193       return MALI_S_FORMAT_X32_S8X24;
194 #endif
195 
196    default:
197       unreachable("Unsupported stencil format.");
198    }
199 }
200 
201 static void
pan_prepare_s(const struct pan_fb_info * fb,unsigned layer_idx,struct MALI_ZS_CRC_EXTENSION * ext)202 pan_prepare_s(const struct pan_fb_info *fb, unsigned layer_idx,
203               struct MALI_ZS_CRC_EXTENSION *ext)
204 {
205    const struct pan_image_view *s = fb->zs.view.s;
206 
207    if (!s)
208       return;
209 
210    const struct pan_image *image = pan_image_view_get_s_plane(s);
211    unsigned level = s->first_level;
212 
213    ext->s_msaa = mali_sampling_mode(s);
214 
215    struct pan_surface surf;
216    pan_iview_get_surface(s, 0, layer_idx, 0, &surf);
217 
218    assert(image->layout.modifier ==
219              DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
220           image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
221    ext->s_writeback_base = surf.data;
222    ext->s_writeback_row_stride = image->layout.slices[level].row_stride;
223    ext->s_writeback_surface_stride =
224       (pan_image_view_get_nr_samples(s) > 1)
225          ? image->layout.slices[level].surface_stride
226          : 0;
227    ext->s_block_format = mod_to_block_fmt(image->layout.modifier);
228    ext->s_write_format = translate_s_format(s->format);
229 }
230 
231 static void
pan_prepare_zs(const struct pan_fb_info * fb,unsigned layer_idx,struct MALI_ZS_CRC_EXTENSION * ext)232 pan_prepare_zs(const struct pan_fb_info *fb, unsigned layer_idx,
233                struct MALI_ZS_CRC_EXTENSION *ext)
234 {
235    const struct pan_image_view *zs = fb->zs.view.zs;
236 
237    if (!zs)
238       return;
239 
240    const struct pan_image *image = pan_image_view_get_zs_plane(zs);
241    unsigned level = zs->first_level;
242 
243    ext->zs_msaa = mali_sampling_mode(zs);
244 
245    struct pan_surface surf;
246    pan_iview_get_surface(zs, 0, layer_idx, 0, &surf);
247    UNUSED const struct pan_image_slice_layout *slice =
248       &image->layout.slices[level];
249 
250    if (drm_is_afbc(image->layout.modifier)) {
251 #if PAN_ARCH >= 9
252       ext->zs_writeback_base = surf.afbc.header;
253       ext->zs_writeback_row_stride = slice->row_stride;
254       /* TODO: surface stride? */
255       ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header;
256 
257       /* TODO: stencil AFBC? */
258 #else
259 #if PAN_ARCH >= 6
260       ext->zs_afbc_row_stride =
261          pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
262 #else
263       ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
264       ext->zs_afbc_body_size = 0x1000;
265       ext->zs_afbc_chunk_size = 9;
266       ext->zs_afbc_sparse = true;
267 #endif
268 
269       ext->zs_afbc_header = surf.afbc.header;
270       ext->zs_afbc_body = surf.afbc.body;
271 #endif
272    } else {
273       assert(image->layout.modifier ==
274                 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
275              image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
276 
277       /* TODO: Z32F(S8) support, which is always linear */
278 
279       ext->zs_writeback_base = surf.data;
280       ext->zs_writeback_row_stride = image->layout.slices[level].row_stride;
281       ext->zs_writeback_surface_stride =
282          (pan_image_view_get_nr_samples(zs) > 1)
283             ? image->layout.slices[level].surface_stride
284             : 0;
285    }
286 
287    ext->zs_block_format = mod_to_block_fmt(image->layout.modifier);
288    ext->zs_write_format = translate_zs_format(zs->format);
289    if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
290       ext->s_writeback_base = ext->zs_writeback_base;
291 }
292 
293 static void
pan_prepare_crc(const struct pan_fb_info * fb,int rt_crc,struct MALI_ZS_CRC_EXTENSION * ext)294 pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
295                 struct MALI_ZS_CRC_EXTENSION *ext)
296 {
297    if (rt_crc < 0)
298       return;
299 
300    assert(rt_crc < fb->rt_count);
301 
302    const struct pan_image_view *rt = fb->rts[rt_crc].view;
303    const struct pan_image *image = pan_image_view_get_color_plane(rt);
304    const struct pan_image_slice_layout *slice =
305       &image->layout.slices[rt->first_level];
306 
307    ext->crc_base =
308       image->data.base + image->data.offset + slice->crc.offset;
309    ext->crc_row_stride = slice->crc.stride;
310 
311 #if PAN_ARCH >= 7
312    ext->crc_render_target = rt_crc;
313 
314    if (fb->rts[rt_crc].clear) {
315       uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
316       ext->crc_clear_color = clear_val | 0xc000000000000000 |
317                              (((uint64_t)clear_val & 0xffff) << 32);
318    }
319 #endif
320 }
321 
322 static void
pan_emit_zs_crc_ext(const struct pan_fb_info * fb,unsigned layer_idx,int rt_crc,struct mali_zs_crc_extension_packed * zs_crc_ext)323 pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx,
324                     int rt_crc, struct mali_zs_crc_extension_packed *zs_crc_ext)
325 {
326    pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
327       pan_prepare_crc(fb, rt_crc, &cfg);
328       cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
329       pan_prepare_zs(fb, layer_idx, &cfg);
330       pan_prepare_s(fb, layer_idx, &cfg);
331    }
332 }
333 
334 /* Measure format as it appears in the tile buffer */
335 
336 static unsigned
pan_bytes_per_pixel_tib(enum pipe_format format)337 pan_bytes_per_pixel_tib(enum pipe_format format)
338 {
339    const struct pan_blendable_format *bf =
340      GENX(panfrost_blendable_format_from_pipe_format)(format);
341 
342    if (bf->internal) {
343       /* Blendable formats are always 32-bits in the tile buffer,
344        * extra bits are used as padding or to dither */
345       return 4;
346    } else {
347       /* Non-blendable formats are raw, rounded up to the nearest
348        * power-of-two size */
349       unsigned bytes = util_format_get_blocksize(format);
350       return util_next_power_of_two(bytes);
351    }
352 }
353 
354 static unsigned
pan_cbuf_bytes_per_pixel(const struct pan_fb_info * fb)355 pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
356 {
357    unsigned sum = 0;
358 
359    for (int cb = 0; cb < fb->rt_count; ++cb) {
360       const struct pan_image_view *rt = fb->rts[cb].view;
361 
362       if (!rt)
363          continue;
364 
365       sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples;
366    }
367 
368    return sum;
369 }
370 
371 /*
372  * Select the largest tile size that fits within the tilebuffer budget.
373  * Formally, maximize (pixels per tile) such that it is a power of two and
374  *
375  *      (bytes per pixel) (pixels per tile) <= (max bytes per tile)
376  *
377  * A bit of algebra gives the following formula.
378  *
379  * Calculate the color buffer allocation size as well.
380  */
381 void
GENX(pan_select_tile_size)382 GENX(pan_select_tile_size)(struct pan_fb_info *fb)
383 {
384    unsigned bytes_per_pixel;
385 
386    assert(util_is_power_of_two_nonzero(fb->tile_buf_budget));
387    assert(fb->tile_buf_budget >= 1024);
388 
389    bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
390    fb->tile_size = fb->tile_buf_budget >> util_logbase2_ceil(bytes_per_pixel);
391 
392    /* Clamp tile size to hardware limits */
393    fb->tile_size =
394       MIN2(fb->tile_size, panfrost_max_effective_tile_size(PAN_ARCH));
395    assert(fb->tile_size >= 4 * 4);
396 
397    /* Colour buffer allocations must be 1K aligned. */
398    fb->cbuf_allocation = ALIGN_POT(bytes_per_pixel * fb->tile_size, 1024);
399    assert(fb->cbuf_allocation <= fb->tile_buf_budget && "tile too big");
400 }
401 
402 static enum mali_color_format
pan_mfbd_raw_format(unsigned bits)403 pan_mfbd_raw_format(unsigned bits)
404 {
405    /* clang-format off */
406    switch (bits) {
407    case    8: return MALI_COLOR_FORMAT_RAW8;
408    case   16: return MALI_COLOR_FORMAT_RAW16;
409    case   24: return MALI_COLOR_FORMAT_RAW24;
410    case   32: return MALI_COLOR_FORMAT_RAW32;
411    case   48: return MALI_COLOR_FORMAT_RAW48;
412    case   64: return MALI_COLOR_FORMAT_RAW64;
413    case   96: return MALI_COLOR_FORMAT_RAW96;
414    case  128: return MALI_COLOR_FORMAT_RAW128;
415    case  192: return MALI_COLOR_FORMAT_RAW192;
416    case  256: return MALI_COLOR_FORMAT_RAW256;
417    case  384: return MALI_COLOR_FORMAT_RAW384;
418    case  512: return MALI_COLOR_FORMAT_RAW512;
419    case  768: return MALI_COLOR_FORMAT_RAW768;
420    case 1024: return MALI_COLOR_FORMAT_RAW1024;
421    case 1536: return MALI_COLOR_FORMAT_RAW1536;
422    case 2048: return MALI_COLOR_FORMAT_RAW2048;
423    default: unreachable("invalid raw bpp");
424    }
425    /* clang-format on */
426 }
427 
428 static void
pan_rt_init_format(const struct pan_image_view * rt,struct MALI_RENDER_TARGET * cfg)429 pan_rt_init_format(const struct pan_image_view *rt,
430                    struct MALI_RENDER_TARGET *cfg)
431 {
432    /* Explode details on the format */
433 
434    const struct util_format_description *desc =
435       util_format_description(rt->format);
436 
437    /* The swizzle for rendering is inverted from texturing */
438 
439    unsigned char swizzle[4] = {
440       PIPE_SWIZZLE_X,
441       PIPE_SWIZZLE_Y,
442       PIPE_SWIZZLE_Z,
443       PIPE_SWIZZLE_W,
444    };
445 
446    /* Fill in accordingly, defaulting to 8-bit UNORM */
447 
448    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
449       cfg->srgb = true;
450 
451    struct pan_blendable_format fmt =
452       *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
453    enum mali_color_format writeback_format;
454 
455    if (fmt.internal) {
456       cfg->internal_format = fmt.internal;
457       writeback_format = fmt.writeback;
458       panfrost_invert_swizzle(desc->swizzle, swizzle);
459    } else {
460       /* Construct RAW internal/writeback, where internal is
461        * specified logarithmically (round to next power-of-two).
462        * Offset specified from RAW8, where 8 = 2^3 */
463 
464       unsigned bits = desc->block.bits;
465       assert(bits >= 8 && bits <= 128);
466       unsigned offset = util_logbase2_ceil(bits) - 3;
467       assert(offset <= 4);
468 
469       cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
470       writeback_format = pan_mfbd_raw_format(bits);
471    }
472 
473 #if PAN_ARCH >= 10
474    const struct pan_image *image = pan_image_view_get_color_plane(rt);
475 
476    if (drm_is_afrc(image->layout.modifier))
477       cfg->afrc.writeback_format = writeback_format;
478    else
479       cfg->writeback_format = writeback_format;
480 #else
481    cfg->writeback_format = writeback_format;
482 #endif
483 
484    cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
485 }
486 
487 static void
pan_prepare_rt(const struct pan_fb_info * fb,unsigned layer_idx,unsigned rt_idx,unsigned cbuf_offset,struct MALI_RENDER_TARGET * cfg)488 pan_prepare_rt(const struct pan_fb_info *fb, unsigned layer_idx,
489                unsigned rt_idx, unsigned cbuf_offset,
490                struct MALI_RENDER_TARGET *cfg)
491 {
492    cfg->clean_pixel_write_enable = fb->rts[rt_idx].clear;
493    cfg->internal_buffer_offset = cbuf_offset;
494    if (fb->rts[rt_idx].clear) {
495       cfg->clear.color_0 = fb->rts[rt_idx].clear_value[0];
496       cfg->clear.color_1 = fb->rts[rt_idx].clear_value[1];
497       cfg->clear.color_2 = fb->rts[rt_idx].clear_value[2];
498       cfg->clear.color_3 = fb->rts[rt_idx].clear_value[3];
499    }
500 
501    const struct pan_image_view *rt = fb->rts[rt_idx].view;
502    if (!rt || fb->rts[rt_idx].discard) {
503       cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
504       cfg->internal_buffer_offset = cbuf_offset;
505 #if PAN_ARCH >= 7
506       cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
507       cfg->dithering_enable = true;
508 #endif
509       return;
510    }
511 
512    const struct pan_image *image = pan_image_view_get_color_plane(rt);
513 
514    if (!drm_is_afrc(image->layout.modifier))
515       cfg->write_enable = true;
516 
517    cfg->dithering_enable = true;
518 
519    const struct pan_image *first_plane = pan_image_view_get_first_plane(rt);
520    unsigned level = rt->first_level;
521    ASSERTED unsigned layer_count = rt->dim == MALI_TEXTURE_DIMENSION_3D
522                                       ? first_plane->layout.depth
523                                       : rt->last_layer - rt->first_layer + 1;
524 
525    assert(rt->last_level == rt->first_level);
526    assert(layer_idx < layer_count);
527 
528    int row_stride = image->layout.slices[level].row_stride;
529 
530    /* Only set layer_stride for layered MSAA rendering  */
531 
532    unsigned layer_stride = (pan_image_view_get_nr_samples(rt) > 1)
533                               ? image->layout.slices[level].surface_stride
534                               : 0;
535 
536    cfg->writeback_msaa = mali_sampling_mode(rt);
537 
538    pan_rt_init_format(rt, cfg);
539 
540    cfg->writeback_block_format = mod_to_block_fmt(image->layout.modifier);
541 
542    struct pan_surface surf;
543    pan_iview_get_surface(rt, 0, layer_idx, 0, &surf);
544 
545    if (drm_is_afbc(image->layout.modifier)) {
546 #if PAN_ARCH >= 9
547       if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
548          cfg->afbc.yuv_transform = true;
549 
550       cfg->afbc.wide_block = panfrost_afbc_is_wide(image->layout.modifier);
551       cfg->afbc.split_block =
552          (image->layout.modifier & AFBC_FORMAT_MOD_SPLIT);
553       cfg->afbc.header = surf.afbc.header;
554       cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header;
555       assert(surf.afbc.body >= surf.afbc.header);
556 
557       cfg->afbc.compression_mode = GENX(pan_afbc_compression_mode)(rt->format);
558       cfg->afbc.row_stride = row_stride;
559 #else
560       const struct pan_image_slice_layout *slice = &image->layout.slices[level];
561 
562 #if PAN_ARCH >= 6
563       cfg->afbc.row_stride =
564          pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
565       cfg->afbc.afbc_wide_block_enable =
566          panfrost_afbc_is_wide(image->layout.modifier);
567       cfg->afbc.afbc_split_block_enable =
568          (image->layout.modifier & AFBC_FORMAT_MOD_SPLIT);
569 #else
570       cfg->afbc.chunk_size = 9;
571       cfg->afbc.sparse = true;
572       cfg->afbc.body_size = slice->afbc.body_size;
573 #endif
574 
575       cfg->afbc.header = surf.afbc.header;
576       cfg->afbc.body = surf.afbc.body;
577 
578       if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
579          cfg->afbc.yuv_transform_enable = true;
580 #endif
581 #if PAN_ARCH >= 10
582    } else if (drm_is_afrc(image->layout.modifier)) {
583       struct pan_afrc_format_info finfo =
584          panfrost_afrc_get_format_info(image->layout.format);
585 
586       cfg->writeback_mode = MALI_WRITEBACK_MODE_AFRC_RGB;
587       cfg->afrc.block_size =
588          GENX(pan_afrc_block_size)(image->layout.modifier, 0);
589       cfg->afrc.format =
590          GENX(pan_afrc_format)(finfo, image->layout.modifier, 0);
591 
592       cfg->rgb.base = surf.data;
593       cfg->rgb.row_stride = row_stride;
594       cfg->rgb.surface_stride = layer_stride;
595 #endif
596    } else {
597       assert(image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
598              image->layout.modifier ==
599                 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
600       cfg->rgb.base = surf.data;
601       cfg->rgb.row_stride = row_stride;
602       cfg->rgb.surface_stride = layer_stride;
603    }
604 }
605 #endif
606 
607 void
GENX(pan_emit_tls)608 GENX(pan_emit_tls)(const struct pan_tls_info *info,
609                    struct mali_local_storage_packed *out)
610 {
611    pan_pack(out, LOCAL_STORAGE, cfg) {
612       if (info->tls.size) {
613          unsigned shift = panfrost_get_stack_shift(info->tls.size);
614 
615          cfg.tls_size = shift;
616 #if PAN_ARCH >= 9
617          /* For now, always use packed TLS addressing. This is
618           * better for the cache and requires no fix up code in
619           * the shader. We may need to revisit this someday for
620           * OpenCL generic pointer support.
621           */
622          cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED;
623 
624          assert((info->tls.ptr & 4095) == 0);
625          cfg.tls_base_pointer = info->tls.ptr >> 8;
626 #else
627          cfg.tls_base_pointer = info->tls.ptr;
628 #endif
629       }
630 
631       if (info->wls.size) {
632          assert(!(info->wls.ptr & 4095));
633          assert((info->wls.ptr & 0xffffffff00000000ULL) ==
634                 ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
635          cfg.wls_base_pointer = info->wls.ptr;
636          unsigned wls_size = pan_wls_adjust_size(info->wls.size);
637          cfg.wls_instances = info->wls.instances;
638          cfg.wls_size_scale = util_logbase2(wls_size) + 1;
639       } else {
640          cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
641       }
642    }
643 }
644 
645 #if PAN_ARCH <= 5
646 static void
pan_emit_midgard_tiler(const struct pan_fb_info * fb,const struct pan_tiler_context * tiler_ctx,struct mali_tiler_context_packed * out)647 pan_emit_midgard_tiler(const struct pan_fb_info *fb,
648                        const struct pan_tiler_context *tiler_ctx,
649                        struct mali_tiler_context_packed *out)
650 {
651    bool hierarchy = !tiler_ctx->midgard.no_hierarchical_tiling;
652 
653    assert(tiler_ctx->midgard.polygon_list);
654 
655    pan_pack(out, TILER_CONTEXT, cfg) {
656       unsigned header_size;
657 
658       if (tiler_ctx->midgard.disable) {
659          cfg.hierarchy_mask =
660             hierarchy ? MALI_MIDGARD_TILER_DISABLED : MALI_MIDGARD_TILER_USER;
661          header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
662          cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
663          cfg.heap_start = tiler_ctx->midgard.polygon_list;
664          cfg.heap_end = tiler_ctx->midgard.polygon_list;
665       } else {
666          cfg.hierarchy_mask = panfrost_choose_hierarchy_mask(
667             fb->width, fb->height, tiler_ctx->midgard.vertex_count, hierarchy);
668          header_size = panfrost_tiler_header_size(
669             fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
670          cfg.polygon_list_size = panfrost_tiler_full_size(
671             fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
672          cfg.heap_start = tiler_ctx->midgard.heap.start;
673          cfg.heap_end = cfg.heap_start + tiler_ctx->midgard.heap.size;
674       }
675 
676       cfg.polygon_list = tiler_ctx->midgard.polygon_list;
677       cfg.polygon_list_body = cfg.polygon_list + header_size;
678    }
679 }
680 #endif
681 
682 #if PAN_ARCH >= 5
683 static void
pan_emit_rt(const struct pan_fb_info * fb,unsigned layer_idx,unsigned idx,unsigned cbuf_offset,struct mali_render_target_packed * out)684 pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx, unsigned idx,
685             unsigned cbuf_offset, struct mali_render_target_packed *out)
686 {
687    pan_pack(out, RENDER_TARGET, cfg) {
688       pan_prepare_rt(fb, layer_idx, idx, cbuf_offset, &cfg);
689    }
690 }
691 
692 #if PAN_ARCH >= 6
693 /* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
694  *
695  *      Forcing clean_tile_writes breaks INTERSECT readbacks
696  *
697  * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
698  * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
699  * ignore, this cannot affect correctness, only performance */
700 
701 static enum mali_pre_post_frame_shader_mode
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,bool force_clean_tile)702 pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
703                           bool force_clean_tile)
704 {
705    if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
706       return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
707    else
708       return mode;
709 }
710 
711 /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
712  * the effective tile size differs from the superblock size of any enabled AFBC
713  * render target. Check this condition. */
714 
715 static bool
pan_force_clean_write_on(const struct pan_image * image,unsigned tile_size)716 pan_force_clean_write_on(const struct pan_image *image, unsigned tile_size)
717 {
718    if (!image)
719       return false;
720 
721    if (!drm_is_afbc(image->layout.modifier))
722       return false;
723 
724    struct pan_block_size renderblk_sz =
725       panfrost_afbc_renderblock_size(image->layout.modifier);
726 
727    assert(renderblk_sz.width >= 16 && renderblk_sz.height >= 16);
728    assert(tile_size <= panfrost_max_effective_tile_size(PAN_ARCH));
729 
730    return tile_size != renderblk_sz.width * renderblk_sz.height;
731 }
732 
733 static bool
pan_force_clean_write(const struct pan_fb_info * fb,unsigned tile_size)734 pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
735 {
736    /* Maximum tile size */
737    assert(tile_size <= panfrost_max_effective_tile_size(PAN_ARCH));
738 
739    for (unsigned i = 0; i < fb->rt_count; ++i) {
740       if (!fb->rts[i].view || fb->rts[i].discard)
741          continue;
742 
743       const struct pan_image *img =
744          pan_image_view_get_color_plane(fb->rts[i].view);
745 
746       if (pan_force_clean_write_on(img, tile_size))
747          return true;
748    }
749 
750    if (fb->zs.view.zs && !fb->zs.discard.z &&
751        pan_force_clean_write_on(pan_image_view_get_zs_plane(fb->zs.view.zs),
752                                 tile_size))
753       return true;
754 
755    if (fb->zs.view.s && !fb->zs.discard.s &&
756        pan_force_clean_write_on(pan_image_view_get_s_plane(fb->zs.view.s),
757                                 tile_size))
758       return true;
759 
760    return false;
761 }
762 
763 #endif
764 
765 unsigned
GENX(pan_emit_fbd)766 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
767                    const struct pan_tls_info *tls,
768                    const struct pan_tiler_context *tiler_ctx, void *out)
769 {
770    void *fbd = out;
771    void *rtd = out + pan_size(FRAMEBUFFER);
772 
773 #if PAN_ARCH <= 5
774    GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
775 #endif
776 
777    int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size);
778    bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
779 
780    pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
781 #if PAN_ARCH >= 6
782       bool force_clean_write = pan_force_clean_write(fb, fb->tile_size);
783 
784       cfg.sample_locations = fb->sample_positions;
785       cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
786                                                   force_clean_write);
787       cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
788                                                   force_clean_write);
789       cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2],
790                                                  force_clean_write);
791 #if PAN_ARCH <= 7
792       /* On Bifrost, the layer_id is passed through a push_uniform, which forces
793        * us to have one pre/post DCD array per layer. */
794       cfg.frame_shader_dcds =
795          fb->bifrost.pre_post.dcds.gpu + (layer_idx * 3 * pan_size(DRAW));
796 #else
797       /* On Valhall, layer_id is passed through the framebuffer frame_arg, which
798        * is preloaded in r62, so we can use the same pre/post DCD array for all
799        * layers. */
800       cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
801 #endif
802       cfg.tiler =
803          PAN_ARCH >= 9 ? tiler_ctx->valhall.desc : tiler_ctx->bifrost.desc;
804 #endif
805       cfg.width = fb->width;
806       cfg.height = fb->height;
807       cfg.bound_max_x = fb->width - 1;
808       cfg.bound_max_y = fb->height - 1;
809 
810       cfg.effective_tile_size = fb->tile_size;
811       cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
812       cfg.render_target_count = MAX2(fb->rt_count, 1);
813 
814       /* Default to 24 bit depth if there's no surface. */
815       cfg.z_internal_format =
816          fb->zs.view.zs ? panfrost_get_z_internal_format(fb->zs.view.zs->format)
817                         : MALI_Z_INTERNAL_FORMAT_D24;
818 
819       cfg.z_clear = fb->zs.clear_value.depth;
820       cfg.s_clear = fb->zs.clear_value.stencil;
821       cfg.color_buffer_allocation = fb->cbuf_allocation;
822 
823       /* The force_samples setting dictates the sample-count that is used
824        * for rasterization, and works like D3D11's ForcedSampleCount feature:
825        *
826        * - If force_samples == 0: Let nr_samples dictate sample count
827        * - If force_samples == 1: force single-sampled rasterization
828        * - If force_samples >= 1: force multi-sampled rasterization
829        *
830        * This can be used to read SYSTEM_VALUE_SAMPLE_MASK_IN from the
831        * fragment shader, even when performing single-sampled rendering.
832        */
833       if (!fb->force_samples) {
834          cfg.sample_count = fb->nr_samples;
835          cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
836       } else if (fb->force_samples == 1) {
837          cfg.sample_count = fb->nr_samples;
838          cfg.sample_pattern = pan_sample_pattern(1);
839       } else {
840          cfg.sample_count = 1;
841          cfg.sample_pattern = pan_sample_pattern(fb->force_samples);
842       }
843 
844       cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
845       cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
846       cfg.has_zs_crc_extension = has_zs_crc_ext;
847 
848       if (crc_rt >= 0) {
849          bool *valid = fb->rts[crc_rt].crc_valid;
850          bool full = !fb->extent.minx && !fb->extent.miny &&
851                      fb->extent.maxx == (fb->width - 1) &&
852                      fb->extent.maxy == (fb->height - 1);
853          bool clean_tile_write = fb->rts[crc_rt].clear;
854 
855 #if PAN_ARCH >= 6
856          clean_tile_write |= pan_force_clean_write_on(
857             pan_image_view_get_color_plane(fb->rts[crc_rt].view),
858             fb->tile_size);
859 #endif
860 
861          /* If the CRC was valid it stays valid, if it wasn't, we must ensure
862           * the render operation covers the full frame, and clean tiles are
863           * pushed to memory. */
864          bool new_valid = *valid | (full && clean_tile_write);
865 
866          cfg.crc_read_enable = *valid;
867 
868          /* If the data is currently invalid, still write CRC
869           * data if we are doing a full write, so that it is
870           * valid for next time. */
871          cfg.crc_write_enable = new_valid;
872 
873          *valid = new_valid;
874       }
875 
876 #if PAN_ARCH >= 9
877       cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
878       cfg.first_provoking_vertex = fb->first_provoking_vertex;
879 
880       /* internal_layer_index is used to select the right primitive list in the
881        * tiler context, and frame_arg is the value that's passed to the fragment
882        * shader through r62-r63, which we use to pass gl_Layer. Since the
883        * layer_idx only takes 8-bits, we might use the extra 56-bits we have
884        * in frame_argument to pass other information to the fragment shader at
885        * some point. */
886       assert(layer_idx >= tiler_ctx->valhall.layer_offset);
887       cfg.internal_layer_index = layer_idx - tiler_ctx->valhall.layer_offset;
888       cfg.frame_argument = layer_idx;
889 #endif
890    }
891 
892 #if PAN_ARCH >= 6
893    pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding)
894       ;
895 #else
896    pan_emit_midgard_tiler(fb, tiler_ctx,
897                           pan_section_ptr(fbd, FRAMEBUFFER, TILER));
898 
899    /* All weights set to 0, nothing to do here */
900    pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
901       ;
902 #endif
903 
904    if (has_zs_crc_ext) {
905       struct mali_zs_crc_extension_packed *zs_crc_ext =
906          out + pan_size(FRAMEBUFFER);
907 
908       pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext);
909       rtd += pan_size(ZS_CRC_EXTENSION);
910    }
911 
912    unsigned rt_count = MAX2(fb->rt_count, 1);
913    unsigned cbuf_offset = 0;
914    for (unsigned i = 0; i < rt_count; i++) {
915       pan_emit_rt(fb, layer_idx, i, cbuf_offset, rtd);
916       rtd += pan_size(RENDER_TARGET);
917       if (!fb->rts[i].view)
918          continue;
919 
920       cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
921                      fb->tile_size * pan_image_view_get_nr_samples(fb->rts[i].view);
922 
923       if (i != crc_rt)
924          *(fb->rts[i].crc_valid) = false;
925    }
926 
927    struct mali_framebuffer_pointer_packed tag;
928    pan_pack(&tag, FRAMEBUFFER_POINTER, cfg) {
929       cfg.zs_crc_extension_present = has_zs_crc_ext;
930       cfg.render_target_count = MAX2(fb->rt_count, 1);
931    }
932    return tag.opaque[0];
933 }
934 #else /* PAN_ARCH == 4 */
935 static enum mali_color_format
pan_sfbd_raw_format(unsigned bits)936 pan_sfbd_raw_format(unsigned bits)
937 {
938    /* clang-format off */
939    switch (bits) {
940    case   16: return MALI_COLOR_FORMAT_1_16B_CHANNEL;
941    case   32: return MALI_COLOR_FORMAT_1_32B_CHANNEL;
942    case   48: return MALI_COLOR_FORMAT_3_16B_CHANNELS;
943    case   64: return MALI_COLOR_FORMAT_2_32B_CHANNELS;
944    case   96: return MALI_COLOR_FORMAT_3_32B_CHANNELS;
945    case  128: return MALI_COLOR_FORMAT_4_32B_CHANNELS;
946    default: unreachable("invalid raw bpp");
947    }
948    /* clang-format on */
949 }
950 
951 void
GENX(pan_select_tile_size)952 GENX(pan_select_tile_size)(struct pan_fb_info *fb)
953 {
954    /* Tile size and color buffer allocation are not configurable on gen 4 */
955    fb->tile_size = 16 * 16;
956 }
957 
958 unsigned
GENX(pan_emit_fbd)959 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
960                    const struct pan_tls_info *tls,
961                    const struct pan_tiler_context *tiler_ctx, void *fbd)
962 {
963    assert(fb->rt_count <= 1);
964 
965    GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
966    pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
967       cfg.bound_max_x = fb->width - 1;
968       cfg.bound_max_y = fb->height - 1;
969       cfg.dithering_enable = true;
970       cfg.clean_pixel_write_enable = true;
971       cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
972       if (fb->rts[0].clear) {
973          cfg.clear_color_0 = fb->rts[0].clear_value[0];
974          cfg.clear_color_1 = fb->rts[0].clear_value[1];
975          cfg.clear_color_2 = fb->rts[0].clear_value[2];
976          cfg.clear_color_3 = fb->rts[0].clear_value[3];
977       }
978 
979       if (fb->zs.clear.z)
980          cfg.z_clear = fb->zs.clear_value.depth;
981 
982       if (fb->zs.clear.s)
983          cfg.s_clear = fb->zs.clear_value.stencil;
984 
985       if (fb->rt_count && fb->rts[0].view) {
986          const struct pan_image_view *rt = fb->rts[0].view;
987          const struct pan_image *image = pan_image_view_get_color_plane(rt);
988 
989          const struct util_format_description *desc =
990             util_format_description(rt->format);
991 
992          /* The swizzle for rendering is inverted from texturing */
993          unsigned char swizzle[4];
994          panfrost_invert_swizzle(desc->swizzle, swizzle);
995          cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
996 
997          struct pan_blendable_format fmt =
998             *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
999 
1000          if (fmt.internal) {
1001             cfg.internal_format = fmt.internal;
1002             cfg.color_writeback_format = fmt.writeback;
1003          } else {
1004             /* Construct RAW internal/writeback */
1005             unsigned bits = desc->block.bits;
1006 
1007             cfg.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE;
1008             cfg.color_writeback_format = pan_sfbd_raw_format(bits);
1009          }
1010 
1011          unsigned level = rt->first_level;
1012          struct pan_surface surf;
1013 
1014          pan_iview_get_surface(rt, 0, 0, 0, &surf);
1015 
1016          cfg.color_write_enable = !fb->rts[0].discard;
1017          cfg.color_writeback.base = surf.data;
1018          cfg.color_writeback.row_stride =
1019             image->layout.slices[level].row_stride;
1020 
1021          cfg.color_block_format = mod_to_block_fmt(image->layout.modifier);
1022          assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
1023                 cfg.color_block_format ==
1024                    MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
1025 
1026          if (pan_image_view_has_crc(rt)) {
1027             const struct pan_image_slice_layout *slice =
1028                &image->layout.slices[level];
1029 
1030             cfg.crc_buffer.row_stride = slice->crc.stride;
1031             cfg.crc_buffer.base =
1032                image->data.base + image->data.offset + slice->crc.offset;
1033          }
1034       }
1035 
1036       if (fb->zs.view.zs) {
1037          const struct pan_image_view *zs = fb->zs.view.zs;
1038          const struct pan_image *image = pan_image_view_get_zs_plane(zs);
1039          unsigned level = zs->first_level;
1040          struct pan_surface surf;
1041 
1042          pan_iview_get_surface(zs, 0, 0, 0, &surf);
1043 
1044          cfg.zs_write_enable = !fb->zs.discard.z;
1045          cfg.zs_writeback.base = surf.data;
1046          cfg.zs_writeback.row_stride = image->layout.slices[level].row_stride;
1047          cfg.zs_block_format = mod_to_block_fmt(image->layout.modifier);
1048          assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
1049                 cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
1050 
1051          cfg.zs_format = translate_zs_format(zs->format);
1052       }
1053 
1054       cfg.sample_count = fb->nr_samples;
1055 
1056       if (fb->rt_count)
1057          cfg.msaa = mali_sampling_mode(fb->rts[0].view);
1058    }
1059 
1060    pan_emit_midgard_tiler(fb, tiler_ctx,
1061                           pan_section_ptr(fbd, FRAMEBUFFER, TILER));
1062 
1063    /* All weights set to 0, nothing to do here */
1064    pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
1065       ;
1066 
1067    pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding)
1068       ;
1069    pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding)
1070       ;
1071    return 0;
1072 }
1073 #endif
1074 
1075 #if PAN_ARCH <= 9
1076 void
GENX(pan_emit_fragment_job_payload)1077 GENX(pan_emit_fragment_job_payload)(const struct pan_fb_info *fb, uint64_t fbd,
1078                                     void *out)
1079 {
1080    pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
1081       payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
1082       payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
1083       payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
1084       payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
1085       payload.framebuffer = fbd;
1086 
1087 #if PAN_ARCH >= 5
1088       if (fb->tile_map.base) {
1089          payload.has_tile_enable_map = true;
1090          payload.tile_enable_map = fb->tile_map.base;
1091          payload.tile_enable_map_row_stride = fb->tile_map.stride;
1092       }
1093 #endif
1094    }
1095 }
1096 #endif
1097