• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  *   Boris Brezillon <boris.brezillon@collabora.com>
26  */
27 
28 #include "util/macros.h"
29 
30 #include "genxml/gen_macros.h"
31 
32 #include "pan_desc.h"
33 #include "pan_encoder.h"
34 #include "pan_texture.h"
35 
36 static unsigned
mod_to_block_fmt(uint64_t mod)37 mod_to_block_fmt(uint64_t mod)
38 {
39    switch (mod) {
40    case DRM_FORMAT_MOD_LINEAR:
41       return MALI_BLOCK_FORMAT_LINEAR;
42    case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
43       return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
44    default:
45 #if PAN_ARCH >= 5
46       if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED))
47          return MALI_BLOCK_FORMAT_AFBC;
48 #endif
49 #if PAN_ARCH >= 7
50       if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED))
51          return MALI_BLOCK_FORMAT_AFBC_TILED;
52 #endif
53 
54       unreachable("Unsupported modifer");
55    }
56 }
57 
58 static enum mali_msaa
mali_sampling_mode(const struct pan_image_view * view)59 mali_sampling_mode(const struct pan_image_view *view)
60 {
61    unsigned nr_samples = pan_image_view_get_nr_samples(view);
62 
63    if (nr_samples > 1) {
64       assert(view->nr_samples == nr_samples);
65       assert(view->planes[0]->layout.slices[0].surface_stride != 0);
66       return MALI_MSAA_LAYERED;
67    }
68 
69    if (view->nr_samples > nr_samples) {
70       assert(nr_samples == 1);
71       return MALI_MSAA_AVERAGE;
72    }
73 
74    assert(view->nr_samples == nr_samples);
75    assert(view->nr_samples == 1);
76 
77    return MALI_MSAA_SINGLE;
78 }
79 
80 int
GENX(pan_select_crc_rt)81 GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
82 {
83    /* Disable CRC when the tile size is not 16x16. In the hardware, CRC
84     * tiles are the same size as the tiles of the framebuffer. However,
85     * our code only handles 16x16 tiles. Therefore under the current
86     * implementation, we must disable CRC when 16x16 tiles are not used.
87     *
88     * This may hurt performance. However, smaller tile sizes are rare, and
89     * CRCs are more expensive at smaller tile sizes, reducing the benefit.
90     * Restricting CRC to 16x16 should work in practice.
91     */
92    if (tile_size != 16 * 16) {
93       assert(tile_size < 16 * 16);
94       return -1;
95    }
96 
97 #if PAN_ARCH <= 6
98    if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
99        pan_image_view_has_crc(fb->rts[0].view))
100       return 0;
101 
102    return -1;
103 #else
104    bool best_rt_valid = false;
105    int best_rt = -1;
106 
107    for (unsigned i = 0; i < fb->rt_count; i++) {
108       if (!fb->rts[i].view || fb->rts[0].discard ||
109           !pan_image_view_has_crc(fb->rts[i].view))
110          continue;
111 
112       bool valid = *(fb->rts[i].crc_valid);
113       bool full = !fb->extent.minx && !fb->extent.miny &&
114                   fb->extent.maxx == (fb->width - 1) &&
115                   fb->extent.maxy == (fb->height - 1);
116       if (!full && !valid)
117          continue;
118 
119       if (best_rt < 0 || (valid && !best_rt_valid)) {
120          best_rt = i;
121          best_rt_valid = valid;
122       }
123 
124       if (valid)
125          break;
126    }
127 
128    return best_rt;
129 #endif
130 }
131 
132 static enum mali_zs_format
translate_zs_format(enum pipe_format in)133 translate_zs_format(enum pipe_format in)
134 {
135    switch (in) {
136    case PIPE_FORMAT_Z16_UNORM:
137       return MALI_ZS_FORMAT_D16;
138    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
139       return MALI_ZS_FORMAT_D24S8;
140    case PIPE_FORMAT_Z24X8_UNORM:
141       return MALI_ZS_FORMAT_D24X8;
142    case PIPE_FORMAT_Z32_FLOAT:
143       return MALI_ZS_FORMAT_D32;
144 #if PAN_ARCH <= 7
145    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
146       return MALI_ZS_FORMAT_D32_S8X24;
147 #endif
148    default:
149       unreachable("Unsupported depth/stencil format.");
150    }
151 }
152 
153 #if PAN_ARCH >= 5
154 static enum mali_s_format
translate_s_format(enum pipe_format in)155 translate_s_format(enum pipe_format in)
156 {
157    switch (in) {
158    case PIPE_FORMAT_S8_UINT:
159       return MALI_S_FORMAT_S8;
160    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
161    case PIPE_FORMAT_X24S8_UINT:
162       return MALI_S_FORMAT_X24S8;
163 
164 #if PAN_ARCH <= 7
165    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
166    case PIPE_FORMAT_S8X24_UINT:
167       return MALI_S_FORMAT_S8X24;
168    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
169       return MALI_S_FORMAT_X32_S8X24;
170 #endif
171 
172    default:
173       unreachable("Unsupported stencil format.");
174    }
175 }
176 
177 static void
pan_prepare_s(const struct pan_fb_info * fb,struct MALI_ZS_CRC_EXTENSION * ext)178 pan_prepare_s(const struct pan_fb_info *fb, struct MALI_ZS_CRC_EXTENSION *ext)
179 {
180    const struct pan_image_view *s = fb->zs.view.s;
181 
182    if (!s)
183       return;
184 
185    const struct pan_image *image = pan_image_view_get_zs_image(s);
186    unsigned level = s->first_level;
187 
188    ext->s_msaa = mali_sampling_mode(s);
189 
190    struct pan_surface surf;
191    pan_iview_get_surface(s, 0, 0, 0, &surf);
192 
193    assert(image->layout.modifier ==
194              DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
195           image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
196    ext->s_writeback_base = surf.data;
197    ext->s_writeback_row_stride = image->layout.slices[level].row_stride;
198    ext->s_writeback_surface_stride =
199       (pan_image_view_get_nr_samples(s) > 1)
200          ? image->layout.slices[level].surface_stride
201          : 0;
202    ext->s_block_format = mod_to_block_fmt(image->layout.modifier);
203    ext->s_write_format = translate_s_format(s->format);
204 }
205 
206 static void
pan_prepare_zs(const struct pan_fb_info * fb,struct MALI_ZS_CRC_EXTENSION * ext)207 pan_prepare_zs(const struct pan_fb_info *fb, struct MALI_ZS_CRC_EXTENSION *ext)
208 {
209    const struct pan_image_view *zs = fb->zs.view.zs;
210 
211    if (!zs)
212       return;
213 
214    const struct pan_image *image = pan_image_view_get_zs_image(zs);
215    unsigned level = zs->first_level;
216 
217    ext->zs_msaa = mali_sampling_mode(zs);
218 
219    struct pan_surface surf;
220    pan_iview_get_surface(zs, 0, 0, 0, &surf);
221    UNUSED const struct pan_image_slice_layout *slice =
222       &image->layout.slices[level];
223 
224    if (drm_is_afbc(image->layout.modifier)) {
225 #if PAN_ARCH >= 9
226       ext->zs_writeback_base = surf.afbc.header;
227       ext->zs_writeback_row_stride = slice->row_stride;
228       /* TODO: surface stride? */
229       ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header;
230 
231       /* TODO: stencil AFBC? */
232 #else
233 #if PAN_ARCH >= 6
234       ext->zs_afbc_row_stride =
235          pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
236 #else
237       ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
238       ext->zs_afbc_body_size = 0x1000;
239       ext->zs_afbc_chunk_size = 9;
240       ext->zs_afbc_sparse = true;
241 #endif
242 
243       ext->zs_afbc_header = surf.afbc.header;
244       ext->zs_afbc_body = surf.afbc.body;
245 #endif
246    } else {
247       assert(image->layout.modifier ==
248                 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
249              image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
250 
251       /* TODO: Z32F(S8) support, which is always linear */
252 
253       ext->zs_writeback_base = surf.data;
254       ext->zs_writeback_row_stride = image->layout.slices[level].row_stride;
255       ext->zs_writeback_surface_stride =
256          (pan_image_view_get_nr_samples(zs) > 1)
257             ? image->layout.slices[level].surface_stride
258             : 0;
259    }
260 
261    ext->zs_block_format = mod_to_block_fmt(image->layout.modifier);
262    ext->zs_write_format = translate_zs_format(zs->format);
263    if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
264       ext->s_writeback_base = ext->zs_writeback_base;
265 }
266 
267 static void
pan_prepare_crc(const struct pan_fb_info * fb,int rt_crc,struct MALI_ZS_CRC_EXTENSION * ext)268 pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
269                 struct MALI_ZS_CRC_EXTENSION *ext)
270 {
271    if (rt_crc < 0)
272       return;
273 
274    assert(rt_crc < fb->rt_count);
275 
276    const struct pan_image_view *rt = fb->rts[rt_crc].view;
277    const struct pan_image *image = pan_image_view_get_rt_image(rt);
278    const struct pan_image_slice_layout *slice =
279       &image->layout.slices[rt->first_level];
280 
281    ext->crc_base =
282       image->data.base + image->data.offset + slice->crc.offset;
283    ext->crc_row_stride = slice->crc.stride;
284 
285 #if PAN_ARCH >= 7
286    ext->crc_render_target = rt_crc;
287 
288    if (fb->rts[rt_crc].clear) {
289       uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
290       ext->crc_clear_color = clear_val | 0xc000000000000000 |
291                              (((uint64_t)clear_val & 0xffff) << 32);
292    }
293 #endif
294 }
295 
296 static void
pan_emit_zs_crc_ext(const struct pan_fb_info * fb,int rt_crc,void * zs_crc_ext)297 pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc, void *zs_crc_ext)
298 {
299    pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
300       pan_prepare_crc(fb, rt_crc, &cfg);
301       cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
302       pan_prepare_zs(fb, &cfg);
303       pan_prepare_s(fb, &cfg);
304    }
305 }
306 
307 /* Measure format as it appears in the tile buffer */
308 
309 static unsigned
pan_bytes_per_pixel_tib(enum pipe_format format)310 pan_bytes_per_pixel_tib(enum pipe_format format)
311 {
312    const struct pan_blendable_format *bf =
313      GENX(panfrost_blendable_format_from_pipe_format)(format);
314 
315    if (bf->internal) {
316       /* Blendable formats are always 32-bits in the tile buffer,
317        * extra bits are used as padding or to dither */
318       return 4;
319    } else {
320       /* Non-blendable formats are raw, rounded up to the nearest
321        * power-of-two size */
322       unsigned bytes = util_format_get_blocksize(format);
323       return util_next_power_of_two(bytes);
324    }
325 }
326 
327 static unsigned
pan_cbuf_bytes_per_pixel(const struct pan_fb_info * fb)328 pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
329 {
330    unsigned sum = 0;
331 
332    for (int cb = 0; cb < fb->rt_count; ++cb) {
333       const struct pan_image_view *rt = fb->rts[cb].view;
334 
335       if (!rt)
336          continue;
337 
338       sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples;
339    }
340 
341    return sum;
342 }
343 
344 /*
345  * Select the largest tile size that fits within the tilebuffer budget.
346  * Formally, maximize (pixels per tile) such that it is a power of two and
347  *
348  *      (bytes per pixel) (pixels per tile) <= (max bytes per tile)
349  *
350  * A bit of algebra gives the following formula.
351  */
352 static unsigned
pan_select_max_tile_size(unsigned tile_buffer_bytes,unsigned bytes_per_pixel)353 pan_select_max_tile_size(unsigned tile_buffer_bytes, unsigned bytes_per_pixel)
354 {
355    assert(util_is_power_of_two_nonzero(tile_buffer_bytes));
356    assert(tile_buffer_bytes >= 1024);
357 
358    return tile_buffer_bytes >> util_logbase2_ceil(bytes_per_pixel);
359 }
360 
361 static enum mali_color_format
pan_mfbd_raw_format(unsigned bits)362 pan_mfbd_raw_format(unsigned bits)
363 {
364    /* clang-format off */
365    switch (bits) {
366    case    8: return MALI_COLOR_FORMAT_RAW8;
367    case   16: return MALI_COLOR_FORMAT_RAW16;
368    case   24: return MALI_COLOR_FORMAT_RAW24;
369    case   32: return MALI_COLOR_FORMAT_RAW32;
370    case   48: return MALI_COLOR_FORMAT_RAW48;
371    case   64: return MALI_COLOR_FORMAT_RAW64;
372    case   96: return MALI_COLOR_FORMAT_RAW96;
373    case  128: return MALI_COLOR_FORMAT_RAW128;
374    case  192: return MALI_COLOR_FORMAT_RAW192;
375    case  256: return MALI_COLOR_FORMAT_RAW256;
376    case  384: return MALI_COLOR_FORMAT_RAW384;
377    case  512: return MALI_COLOR_FORMAT_RAW512;
378    case  768: return MALI_COLOR_FORMAT_RAW768;
379    case 1024: return MALI_COLOR_FORMAT_RAW1024;
380    case 1536: return MALI_COLOR_FORMAT_RAW1536;
381    case 2048: return MALI_COLOR_FORMAT_RAW2048;
382    default: unreachable("invalid raw bpp");
383    }
384    /* clang-format on */
385 }
386 
387 static void
pan_rt_init_format(const struct pan_image_view * rt,struct MALI_RENDER_TARGET * cfg)388 pan_rt_init_format(const struct pan_image_view *rt,
389                    struct MALI_RENDER_TARGET *cfg)
390 {
391    /* Explode details on the format */
392 
393    const struct util_format_description *desc =
394       util_format_description(rt->format);
395 
396    /* The swizzle for rendering is inverted from texturing */
397 
398    unsigned char swizzle[4] = {
399       PIPE_SWIZZLE_X,
400       PIPE_SWIZZLE_Y,
401       PIPE_SWIZZLE_Z,
402       PIPE_SWIZZLE_W,
403    };
404 
405    /* Fill in accordingly, defaulting to 8-bit UNORM */
406 
407    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
408       cfg->srgb = true;
409 
410    struct pan_blendable_format fmt =
411       *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
412 
413    if (fmt.internal) {
414       cfg->internal_format = fmt.internal;
415       cfg->writeback_format = fmt.writeback;
416       panfrost_invert_swizzle(desc->swizzle, swizzle);
417    } else {
418       /* Construct RAW internal/writeback, where internal is
419        * specified logarithmically (round to next power-of-two).
420        * Offset specified from RAW8, where 8 = 2^3 */
421 
422       unsigned bits = desc->block.bits;
423       unsigned offset = util_logbase2_ceil(bits) - 3;
424       assert(offset <= 4);
425 
426       cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
427 
428       cfg->writeback_format = pan_mfbd_raw_format(bits);
429    }
430 
431    cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
432 }
433 
434 static void
pan_prepare_rt(const struct pan_fb_info * fb,unsigned idx,unsigned cbuf_offset,struct MALI_RENDER_TARGET * cfg)435 pan_prepare_rt(const struct pan_fb_info *fb,
436                unsigned idx, unsigned cbuf_offset,
437                struct MALI_RENDER_TARGET *cfg)
438 {
439    cfg->clean_pixel_write_enable = fb->rts[idx].clear;
440    cfg->internal_buffer_offset = cbuf_offset;
441    if (fb->rts[idx].clear) {
442       cfg->clear.color_0 = fb->rts[idx].clear_value[0];
443       cfg->clear.color_1 = fb->rts[idx].clear_value[1];
444       cfg->clear.color_2 = fb->rts[idx].clear_value[2];
445       cfg->clear.color_3 = fb->rts[idx].clear_value[3];
446    }
447 
448    const struct pan_image_view *rt = fb->rts[idx].view;
449    if (!rt || fb->rts[idx].discard) {
450       cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
451       cfg->internal_buffer_offset = cbuf_offset;
452 #if PAN_ARCH >= 7
453       cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
454       cfg->dithering_enable = true;
455 #endif
456       return;
457    }
458 
459    const struct pan_image *image = pan_image_view_get_rt_image(rt);
460 
461    cfg->write_enable = true;
462    cfg->dithering_enable = true;
463 
464    unsigned level = rt->first_level;
465    assert(rt->last_level == rt->first_level);
466    assert(rt->last_layer == rt->first_layer);
467 
468    int row_stride = image->layout.slices[level].row_stride;
469 
470    /* Only set layer_stride for layered MSAA rendering  */
471 
472    unsigned layer_stride = (pan_image_view_get_nr_samples(rt) > 1)
473                               ? image->layout.slices[level].surface_stride
474                               : 0;
475 
476    cfg->writeback_msaa = mali_sampling_mode(rt);
477 
478    pan_rt_init_format(rt, cfg);
479 
480    cfg->writeback_block_format = mod_to_block_fmt(image->layout.modifier);
481 
482    struct pan_surface surf;
483    pan_iview_get_surface(rt, 0, 0, 0, &surf);
484 
485    if (drm_is_afbc(image->layout.modifier)) {
486 #if PAN_ARCH >= 9
487       if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
488          cfg->afbc.yuv_transform = true;
489 
490       cfg->afbc.wide_block = panfrost_afbc_is_wide(image->layout.modifier);
491       cfg->afbc.header = surf.afbc.header;
492       cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header;
493       assert(surf.afbc.body >= surf.afbc.header);
494 
495       cfg->afbc.compression_mode = GENX(pan_afbc_compression_mode)(rt->format);
496       cfg->afbc.row_stride = row_stride;
497 #else
498       const struct pan_image_slice_layout *slice = &image->layout.slices[level];
499 
500 #if PAN_ARCH >= 6
501       cfg->afbc.row_stride =
502          pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
503       cfg->afbc.afbc_wide_block_enable =
504          panfrost_afbc_is_wide(image->layout.modifier);
505 #else
506       cfg->afbc.chunk_size = 9;
507       cfg->afbc.sparse = true;
508       cfg->afbc.body_size = slice->afbc.body_size;
509 #endif
510 
511       cfg->afbc.header = surf.afbc.header;
512       cfg->afbc.body = surf.afbc.body;
513 
514       if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
515          cfg->afbc.yuv_transform_enable = true;
516 #endif
517    } else {
518       assert(image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
519              image->layout.modifier ==
520                 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
521       cfg->rgb.base = surf.data;
522       cfg->rgb.row_stride = row_stride;
523       cfg->rgb.surface_stride = layer_stride;
524    }
525 }
526 #endif
527 
528 void
GENX(pan_emit_tls)529 GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out)
530 {
531    pan_pack(out, LOCAL_STORAGE, cfg) {
532       if (info->tls.size) {
533          unsigned shift = panfrost_get_stack_shift(info->tls.size);
534 
535          cfg.tls_size = shift;
536 #if PAN_ARCH >= 9
537          /* For now, always use packed TLS addressing. This is
538           * better for the cache and requires no fix up code in
539           * the shader. We may need to revisit this someday for
540           * OpenCL generic pointer support.
541           */
542          cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED;
543 
544          assert((info->tls.ptr & 4095) == 0);
545          cfg.tls_base_pointer = info->tls.ptr >> 8;
546 #else
547          cfg.tls_base_pointer = info->tls.ptr;
548 #endif
549       }
550 
551       if (info->wls.size) {
552          assert(!(info->wls.ptr & 4095));
553          assert((info->wls.ptr & 0xffffffff00000000ULL) ==
554                 ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
555          cfg.wls_base_pointer = info->wls.ptr;
556          unsigned wls_size = pan_wls_adjust_size(info->wls.size);
557          cfg.wls_instances = info->wls.instances;
558          cfg.wls_size_scale = util_logbase2(wls_size) + 1;
559       } else {
560          cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
561       }
562    }
563 }
564 
565 #if PAN_ARCH <= 5
566 static void
pan_emit_midgard_tiler(const struct pan_fb_info * fb,const struct pan_tiler_context * tiler_ctx,void * out)567 pan_emit_midgard_tiler(const struct pan_fb_info *fb,
568                        const struct pan_tiler_context *tiler_ctx, void *out)
569 {
570    bool hierarchy = !tiler_ctx->midgard.no_hierarchical_tiling;
571 
572    assert(tiler_ctx->midgard.polygon_list);
573 
574    pan_pack(out, TILER_CONTEXT, cfg) {
575       unsigned header_size;
576 
577       if (tiler_ctx->midgard.disable) {
578          cfg.hierarchy_mask =
579             hierarchy ? MALI_MIDGARD_TILER_DISABLED : MALI_MIDGARD_TILER_USER;
580          header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
581          cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
582          cfg.heap_start = tiler_ctx->midgard.polygon_list;
583          cfg.heap_end = tiler_ctx->midgard.polygon_list;
584       } else {
585          cfg.hierarchy_mask = panfrost_choose_hierarchy_mask(
586             fb->width, fb->height, tiler_ctx->vertex_count, hierarchy);
587          header_size = panfrost_tiler_header_size(
588             fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
589          cfg.polygon_list_size = panfrost_tiler_full_size(
590             fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
591          cfg.heap_start = tiler_ctx->midgard.heap.start;
592          cfg.heap_end = cfg.heap_start + tiler_ctx->midgard.heap.size;
593       }
594 
595       cfg.polygon_list = tiler_ctx->midgard.polygon_list;
596       cfg.polygon_list_body = cfg.polygon_list + header_size;
597    }
598 }
599 #endif
600 
601 #if PAN_ARCH >= 5
602 static void
pan_emit_rt(const struct pan_fb_info * fb,unsigned idx,unsigned cbuf_offset,void * out)603 pan_emit_rt(const struct pan_fb_info *fb,
604             unsigned idx, unsigned cbuf_offset, void *out)
605 {
606    pan_pack(out, RENDER_TARGET, cfg) {
607       pan_prepare_rt(fb, idx, cbuf_offset, &cfg);
608    }
609 }
610 
611 #if PAN_ARCH >= 6
612 /* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
613  *
614  *      Forcing clean_tile_writes breaks INTERSECT readbacks
615  *
616  * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
617  * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
618  * ignore, this cannot affect correctness, only performance */
619 
620 static enum mali_pre_post_frame_shader_mode
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,bool force_clean_tile)621 pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
622                           bool force_clean_tile)
623 {
624    if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
625       return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
626    else
627       return mode;
628 }
629 
630 /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
631  * the effective tile size differs from the superblock size of any enabled AFBC
632  * render target. Check this condition. */
633 
634 static bool
pan_force_clean_write_rt(const struct pan_image_view * rt,unsigned tile_size)635 pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size)
636 {
637    const struct pan_image *image = pan_image_view_get_rt_image(rt);
638    if (!drm_is_afbc(image->layout.modifier))
639       return false;
640 
641    unsigned superblock = panfrost_afbc_superblock_width(image->layout.modifier);
642 
643    assert(superblock >= 16);
644    assert(tile_size <= 16 * 16);
645 
646    /* Tile size and superblock differ unless they are both 16x16 */
647    return !(superblock == 16 && tile_size == 16 * 16);
648 }
649 
650 static bool
pan_force_clean_write(const struct pan_fb_info * fb,unsigned tile_size)651 pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
652 {
653    /* Maximum tile size */
654    assert(tile_size <= 16 * 16);
655 
656    for (unsigned i = 0; i < fb->rt_count; ++i) {
657       if (fb->rts[i].view && !fb->rts[i].discard &&
658           pan_force_clean_write_rt(fb->rts[i].view, tile_size))
659          return true;
660    }
661 
662    if (fb->zs.view.zs && !fb->zs.discard.z &&
663        pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
664       return true;
665 
666    if (fb->zs.view.s && !fb->zs.discard.s &&
667        pan_force_clean_write_rt(fb->zs.view.s, tile_size))
668       return true;
669 
670    return false;
671 }
672 
673 #endif
674 
675 unsigned
GENX(pan_emit_fbd)676 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, const struct pan_tls_info *tls,
677                    const struct pan_tiler_context *tiler_ctx, void *out)
678 {
679    void *fbd = out;
680    void *rtd = out + pan_size(FRAMEBUFFER);
681 
682 #if PAN_ARCH <= 5
683    GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
684 #endif
685 
686    unsigned bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
687    unsigned tile_size =
688       pan_select_max_tile_size(fb->tile_buf_budget, bytes_per_pixel);
689 
690    /* Clamp tile size to hardware limits */
691    tile_size = MIN2(tile_size, 16 * 16);
692    assert(tile_size >= 4 * 4);
693 
694    /* Colour buffer allocations must be 1K aligned. */
695    unsigned cbuf_allocation = ALIGN_POT(bytes_per_pixel * tile_size, 1024);
696    assert(cbuf_allocation <= fb->tile_buf_budget && "tile too big");
697 
698    int crc_rt = GENX(pan_select_crc_rt)(fb, tile_size);
699    bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
700 
701    pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
702 #if PAN_ARCH >= 6
703       bool force_clean_write = pan_force_clean_write(fb, tile_size);
704 
705       cfg.sample_locations = fb->sample_positions;
706       cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
707                                                   force_clean_write);
708       cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
709                                                   force_clean_write);
710       cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2],
711                                                  force_clean_write);
712       cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
713       cfg.tiler = tiler_ctx->bifrost;
714 #endif
715       cfg.width = fb->width;
716       cfg.height = fb->height;
717       cfg.bound_max_x = fb->width - 1;
718       cfg.bound_max_y = fb->height - 1;
719 
720       cfg.effective_tile_size = tile_size;
721       cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
722       cfg.render_target_count = MAX2(fb->rt_count, 1);
723 
724       /* Default to 24 bit depth if there's no surface. */
725       cfg.z_internal_format =
726          fb->zs.view.zs ? panfrost_get_z_internal_format(fb->zs.view.zs->format)
727                         : MALI_Z_INTERNAL_FORMAT_D24;
728 
729       cfg.z_clear = fb->zs.clear_value.depth;
730       cfg.s_clear = fb->zs.clear_value.stencil;
731       cfg.color_buffer_allocation = cbuf_allocation;
732 
733       /* The force_samples setting dictates the sample-count that is used
734        * for rasterization, and works like D3D11's ForcedSampleCount feature:
735        *
736        * - If force_samples == 0: Let nr_samples dictate sample count
737        * - If force_samples == 1: force single-sampled rasterization
738        * - If force_samples >= 1: force multi-sampled rasterization
739        *
740        * This can be used to read SYSTEM_VALUE_SAMPLE_MASK_IN from the
741        * fragment shader, even when performing single-sampled rendering.
742        */
743       if (!fb->force_samples) {
744          cfg.sample_count = fb->nr_samples;
745          cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
746       } else if (fb->force_samples == 1) {
747          cfg.sample_count = fb->nr_samples;
748          cfg.sample_pattern = pan_sample_pattern(1);
749       } else {
750          cfg.sample_count = 1;
751          cfg.sample_pattern = pan_sample_pattern(fb->force_samples);
752       }
753 
754       cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
755       cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
756       cfg.has_zs_crc_extension = has_zs_crc_ext;
757 
758       if (crc_rt >= 0) {
759          bool *valid = fb->rts[crc_rt].crc_valid;
760          bool full = !fb->extent.minx && !fb->extent.miny &&
761                      fb->extent.maxx == (fb->width - 1) &&
762                      fb->extent.maxy == (fb->height - 1);
763 
764          cfg.crc_read_enable = *valid;
765 
766          /* If the data is currently invalid, still write CRC
767           * data if we are doing a full write, so that it is
768           * valid for next time. */
769          cfg.crc_write_enable = *valid || full;
770 
771          *valid |= full;
772       }
773 
774 #if PAN_ARCH >= 9
775       cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
776       cfg.first_provoking_vertex = fb->first_provoking_vertex;
777 #endif
778    }
779 
780 #if PAN_ARCH >= 6
781    pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding)
782       ;
783 #else
784    pan_emit_midgard_tiler(fb, tiler_ctx,
785                           pan_section_ptr(fbd, FRAMEBUFFER, TILER));
786 
787    /* All weights set to 0, nothing to do here */
788    pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
789       ;
790 #endif
791 
792    if (has_zs_crc_ext) {
793       pan_emit_zs_crc_ext(fb, crc_rt, out + pan_size(FRAMEBUFFER));
794       rtd += pan_size(ZS_CRC_EXTENSION);
795    }
796 
797    unsigned rt_count = MAX2(fb->rt_count, 1);
798    unsigned cbuf_offset = 0;
799    for (unsigned i = 0; i < rt_count; i++) {
800       pan_emit_rt(fb, i, cbuf_offset, rtd);
801       rtd += pan_size(RENDER_TARGET);
802       if (!fb->rts[i].view)
803          continue;
804 
805       cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
806                      tile_size * pan_image_view_get_nr_samples(fb->rts[i].view);
807 
808       if (i != crc_rt)
809          *(fb->rts[i].crc_valid) = false;
810    }
811 
812    struct mali_framebuffer_pointer_packed tag;
813    pan_pack(tag.opaque, FRAMEBUFFER_POINTER, cfg) {
814       cfg.zs_crc_extension_present = has_zs_crc_ext;
815       cfg.render_target_count = MAX2(fb->rt_count, 1);
816    }
817    return tag.opaque[0];
818 }
819 #else /* PAN_ARCH == 4 */
820 unsigned
GENX(pan_emit_fbd)821 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, const struct pan_tls_info *tls,
822                    const struct pan_tiler_context *tiler_ctx, void *fbd)
823 {
824    assert(fb->rt_count <= 1);
825 
826    GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
827    pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
828       cfg.bound_max_x = fb->width - 1;
829       cfg.bound_max_y = fb->height - 1;
830       cfg.dithering_enable = true;
831       cfg.clean_pixel_write_enable = true;
832       cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
833       if (fb->rts[0].clear) {
834          cfg.clear_color_0 = fb->rts[0].clear_value[0];
835          cfg.clear_color_1 = fb->rts[0].clear_value[1];
836          cfg.clear_color_2 = fb->rts[0].clear_value[2];
837          cfg.clear_color_3 = fb->rts[0].clear_value[3];
838       }
839 
840       if (fb->zs.clear.z)
841          cfg.z_clear = fb->zs.clear_value.depth;
842 
843       if (fb->zs.clear.s)
844          cfg.s_clear = fb->zs.clear_value.stencil;
845 
846       if (fb->rt_count && fb->rts[0].view) {
847          const struct pan_image_view *rt = fb->rts[0].view;
848          const struct pan_image *image = pan_image_view_get_rt_image(rt);
849 
850          const struct util_format_description *desc =
851             util_format_description(rt->format);
852 
853          /* The swizzle for rendering is inverted from texturing */
854          unsigned char swizzle[4];
855          panfrost_invert_swizzle(desc->swizzle, swizzle);
856          cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
857 
858          struct pan_blendable_format fmt =
859             *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
860 
861          if (fmt.internal) {
862             cfg.internal_format = fmt.internal;
863             cfg.color_writeback_format = fmt.writeback;
864          } else {
865             unreachable("raw formats not finished for SFBD");
866          }
867 
868          unsigned level = rt->first_level;
869          struct pan_surface surf;
870 
871          pan_iview_get_surface(rt, 0, 0, 0, &surf);
872 
873          cfg.color_write_enable = !fb->rts[0].discard;
874          cfg.color_writeback.base = surf.data;
875          cfg.color_writeback.row_stride =
876             image->layout.slices[level].row_stride;
877 
878          cfg.color_block_format = mod_to_block_fmt(image->layout.modifier);
879          assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
880                 cfg.color_block_format ==
881                    MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
882 
883          if (pan_image_view_has_crc(rt)) {
884             const struct pan_image_slice_layout *slice =
885                &image->layout.slices[level];
886 
887             cfg.crc_buffer.row_stride = slice->crc.stride;
888             cfg.crc_buffer.base =
889                image->data.base + image->data.offset + slice->crc.offset;
890          }
891       }
892 
893       if (fb->zs.view.zs) {
894          const struct pan_image_view *zs = fb->zs.view.zs;
895          const struct pan_image *image = pan_image_view_get_zs_image(zs);
896          unsigned level = zs->first_level;
897          struct pan_surface surf;
898 
899          pan_iview_get_surface(zs, 0, 0, 0, &surf);
900 
901          cfg.zs_write_enable = !fb->zs.discard.z;
902          cfg.zs_writeback.base = surf.data;
903          cfg.zs_writeback.row_stride = image->layout.slices[level].row_stride;
904          cfg.zs_block_format = mod_to_block_fmt(image->layout.modifier);
905          assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
906                 cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
907 
908          cfg.zs_format = translate_zs_format(zs->format);
909       }
910 
911       cfg.sample_count = fb->nr_samples;
912 
913       if (fb->rt_count)
914          cfg.msaa = mali_sampling_mode(fb->rts[0].view);
915    }
916 
917    pan_emit_midgard_tiler(fb, tiler_ctx,
918                           pan_section_ptr(fbd, FRAMEBUFFER, TILER));
919 
920    /* All weights set to 0, nothing to do here */
921    pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
922       ;
923 
924    pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding)
925       ;
926    pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding)
927       ;
928    return 0;
929 }
930 #endif
931 
932 #if PAN_ARCH <= 9
933 void
GENX(pan_emit_fragment_job)934 GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb, mali_ptr fbd,
935                             void *out)
936 {
937    pan_section_pack(out, FRAGMENT_JOB, HEADER, header) {
938       header.type = MALI_JOB_TYPE_FRAGMENT;
939       header.index = 1;
940    }
941 
942    pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
943       payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
944       payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
945       payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
946       payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
947       payload.framebuffer = fbd;
948 
949 #if PAN_ARCH >= 5
950       if (fb->tile_map.base) {
951          payload.has_tile_enable_map = true;
952          payload.tile_enable_map = fb->tile_map.base;
953          payload.tile_enable_map_row_stride = fb->tile_map.stride;
954       }
955 #endif
956    }
957 }
958 #endif
959