• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  *   Boris Brezillon <boris.brezillon@collabora.com>
26  */
27 
28 #include "util/macros.h"
29 
30 
31 #include "pan_cs.h"
32 #include "pan_encoder.h"
33 #include "pan_texture.h"
34 
35 static unsigned
mod_to_block_fmt(uint64_t mod)36 mod_to_block_fmt(uint64_t mod)
37 {
38         switch (mod) {
39         case DRM_FORMAT_MOD_LINEAR:
40                 return MALI_BLOCK_FORMAT_LINEAR;
41 	case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
42                 return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
43         default:
44 #if PAN_ARCH >= 5
45                 if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED))
46                         return MALI_BLOCK_FORMAT_AFBC;
47 #endif
48 #if PAN_ARCH >= 7
49                 if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED))
50                         return MALI_BLOCK_FORMAT_AFBC_TILED;
51 #endif
52 
53                 unreachable("Unsupported modifer");
54         }
55 }
56 
57 static enum mali_msaa
mali_sampling_mode(const struct pan_image_view * view)58 mali_sampling_mode(const struct pan_image_view *view)
59 {
60         if (view->image->layout.nr_samples > 1) {
61                 assert(view->nr_samples == view->image->layout.nr_samples);
62                 assert(view->image->layout.slices[0].surface_stride != 0);
63                 return MALI_MSAA_LAYERED;
64         }
65 
66         if (view->nr_samples > view->image->layout.nr_samples) {
67                 assert(view->image->layout.nr_samples == 1);
68                 return MALI_MSAA_AVERAGE;
69         }
70 
71         assert(view->nr_samples == view->image->layout.nr_samples);
72         assert(view->nr_samples == 1);
73 
74         return MALI_MSAA_SINGLE;
75 }
76 
77 static inline enum mali_sample_pattern
pan_sample_pattern(unsigned samples)78 pan_sample_pattern(unsigned samples)
79 {
80         switch (samples) {
81         case 1:  return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
82         case 4:  return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
83         case 8:  return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
84         case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
85         default: unreachable("Unsupported sample count");
86         }
87 }
88 
89 int
GENX(pan_select_crc_rt)90 GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
91 {
92         /* Disable CRC when the tile size is not 16x16. In the hardware, CRC
93          * tiles are the same size as the tiles of the framebuffer. However,
94          * our code only handles 16x16 tiles. Therefore under the current
95          * implementation, we must disable CRC when 16x16 tiles are not used.
96          *
97          * This may hurt performance. However, smaller tile sizes are rare, and
98          * CRCs are more expensive at smaller tile sizes, reducing the benefit.
99          * Restricting CRC to 16x16 should work in practice.
100          */
101         if (tile_size != 16 * 16) {
102                 assert(tile_size < 16 * 16);
103                 return -1;
104         }
105 
106 #if PAN_ARCH <= 6
107         if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
108             fb->rts[0].view->image->layout.crc_mode != PAN_IMAGE_CRC_NONE)
109                 return 0;
110 
111         return -1;
112 #else
113         bool best_rt_valid = false;
114         int best_rt = -1;
115 
116         for (unsigned i = 0; i < fb->rt_count; i++) {
117 		if (!fb->rts[i].view || fb->rts[0].discard ||
118                     fb->rts[i].view->image->layout.crc_mode == PAN_IMAGE_CRC_NONE)
119                         continue;
120 
121                 bool valid = *(fb->rts[i].crc_valid);
122                 bool full = !fb->extent.minx && !fb->extent.miny &&
123                             fb->extent.maxx == (fb->width - 1) &&
124                             fb->extent.maxy == (fb->height - 1);
125                 if (!full && !valid)
126                         continue;
127 
128                 if (best_rt < 0 || (valid && !best_rt_valid)) {
129                         best_rt = i;
130                         best_rt_valid = valid;
131                 }
132 
133                 if (valid)
134                         break;
135         }
136 
137         return best_rt;
138 #endif
139 }
140 
141 static enum mali_zs_format
translate_zs_format(enum pipe_format in)142 translate_zs_format(enum pipe_format in)
143 {
144         switch (in) {
145         case PIPE_FORMAT_Z16_UNORM: return MALI_ZS_FORMAT_D16;
146         case PIPE_FORMAT_Z24_UNORM_S8_UINT: return MALI_ZS_FORMAT_D24S8;
147         case PIPE_FORMAT_Z24X8_UNORM: return MALI_ZS_FORMAT_D24X8;
148         case PIPE_FORMAT_Z32_FLOAT: return MALI_ZS_FORMAT_D32;
149 #if PAN_ARCH <= 7
150         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_ZS_FORMAT_D32_S8X24;
151 #endif
152         default: unreachable("Unsupported depth/stencil format.");
153         }
154 }
155 
156 #if PAN_ARCH >= 5
157 static enum mali_s_format
translate_s_format(enum pipe_format in)158 translate_s_format(enum pipe_format in)
159 {
160         switch (in) {
161         case PIPE_FORMAT_S8_UINT: return MALI_S_FORMAT_S8;
162         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
163         case PIPE_FORMAT_X24S8_UINT:
164                 return MALI_S_FORMAT_X24S8;
165 
166 #if PAN_ARCH <= 7
167         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
168         case PIPE_FORMAT_S8X24_UINT:
169                 return MALI_S_FORMAT_S8X24;
170         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
171                 return MALI_S_FORMAT_X32_S8X24;
172 #endif
173 
174         default:
175                 unreachable("Unsupported stencil format.");
176         }
177 }
178 
179 static void
pan_prepare_s(const struct pan_fb_info * fb,struct MALI_ZS_CRC_EXTENSION * ext)180 pan_prepare_s(const struct pan_fb_info *fb,
181               struct MALI_ZS_CRC_EXTENSION *ext)
182 {
183         const struct pan_image_view *s = fb->zs.view.s;
184 
185         if (!s)
186                 return;
187 
188         unsigned level = s->first_level;
189 
190         ext->s_msaa = mali_sampling_mode(s);
191 
192         struct pan_surface surf;
193         pan_iview_get_surface(s, 0, 0, 0, &surf);
194 
195         assert(s->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
196                s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
197         ext->s_writeback_base = surf.data;
198         ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride;
199         ext->s_writeback_surface_stride =
200                 (s->image->layout.nr_samples > 1) ?
201                 s->image->layout.slices[level].surface_stride : 0;
202         ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier);
203         ext->s_write_format = translate_s_format(s->format);
204 }
205 
206 static void
pan_prepare_zs(const struct pan_fb_info * fb,struct MALI_ZS_CRC_EXTENSION * ext)207 pan_prepare_zs(const struct pan_fb_info *fb,
208                struct MALI_ZS_CRC_EXTENSION *ext)
209 {
210         const struct pan_image_view *zs = fb->zs.view.zs;
211 
212         if (!zs)
213                 return;
214 
215         unsigned level = zs->first_level;
216 
217         ext->zs_msaa = mali_sampling_mode(zs);
218 
219         struct pan_surface surf;
220         pan_iview_get_surface(zs, 0, 0, 0, &surf);
221         UNUSED const struct pan_image_slice_layout *slice = &zs->image->layout.slices[level];
222 
223         if (drm_is_afbc(zs->image->layout.modifier)) {
224 #if PAN_ARCH >= 9
225                 ext->zs_writeback_base = surf.afbc.header;
226                 ext->zs_writeback_row_stride = slice->row_stride;
227                 /* TODO: surface stride? */
228                 ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header;
229 
230                 /* TODO: stencil AFBC? */
231 #else
232 #if PAN_ARCH >= 6
233                 ext->zs_afbc_row_stride = pan_afbc_stride_blocks(zs->image->layout.modifier, slice->row_stride);
234 #else
235                 ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
236                 ext->zs_afbc_body_size = 0x1000;
237                 ext->zs_afbc_chunk_size = 9;
238                 ext->zs_afbc_sparse = true;
239 #endif
240 
241                 ext->zs_afbc_header = surf.afbc.header;
242                 ext->zs_afbc_body = surf.afbc.body;
243 #endif
244         } else {
245                 assert(zs->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
246                        zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
247 
248                 /* TODO: Z32F(S8) support, which is always linear */
249 
250                 ext->zs_writeback_base = surf.data;
251                 ext->zs_writeback_row_stride =
252                         zs->image->layout.slices[level].row_stride;
253                 ext->zs_writeback_surface_stride =
254                         (zs->image->layout.nr_samples > 1) ?
255                         zs->image->layout.slices[level].surface_stride : 0;
256         }
257 
258         ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
259         ext->zs_write_format = translate_zs_format(zs->format);
260         if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
261                 ext->s_writeback_base = ext->zs_writeback_base;
262 }
263 
264 static void
pan_prepare_crc(const struct pan_fb_info * fb,int rt_crc,struct MALI_ZS_CRC_EXTENSION * ext)265 pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
266                 struct MALI_ZS_CRC_EXTENSION *ext)
267 {
268         if (rt_crc < 0)
269                 return;
270 
271         assert(rt_crc < fb->rt_count);
272 
273         const struct pan_image_view *rt = fb->rts[rt_crc].view;
274         const struct pan_image_slice_layout *slice = &rt->image->layout.slices[rt->first_level];
275         ext->crc_base = (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND ?
276                          (rt->image->data.bo->ptr.gpu + rt->image->data.offset) :
277                          (rt->image->crc.bo->ptr.gpu + rt->image->crc.offset)) +
278                         slice->crc.offset;
279         ext->crc_row_stride = slice->crc.stride;
280 
281 #if PAN_ARCH >= 7
282         ext->crc_render_target = rt_crc;
283 
284         if (fb->rts[rt_crc].clear) {
285                 uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
286                 ext->crc_clear_color = clear_val | 0xc000000000000000 |
287                                        (((uint64_t)clear_val & 0xffff) << 32);
288         }
289 #endif
290 }
291 
292 static void
pan_emit_zs_crc_ext(const struct pan_fb_info * fb,int rt_crc,void * zs_crc_ext)293 pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc,
294                     void *zs_crc_ext)
295 {
296         pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
297                 pan_prepare_crc(fb, rt_crc, &cfg);
298                 cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
299                 pan_prepare_zs(fb, &cfg);
300                 pan_prepare_s(fb, &cfg);
301         }
302 }
303 
304 /* Measure format as it appears in the tile buffer */
305 
306 static unsigned
pan_bytes_per_pixel_tib(enum pipe_format format)307 pan_bytes_per_pixel_tib(enum pipe_format format)
308 {
309         if (panfrost_blendable_formats_v7[format].internal) {
310                 /* Blendable formats are always 32-bits in the tile buffer,
311                  * extra bits are used as padding or to dither */
312                 return 4;
313         } else {
314                 /* Non-blendable formats are raw, rounded up to the nearest
315                  * power-of-two size */
316                 unsigned bytes = util_format_get_blocksize(format);
317                 return util_next_power_of_two(bytes);
318         }
319 }
320 
321 static unsigned
pan_cbuf_bytes_per_pixel(const struct pan_fb_info * fb)322 pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
323 {
324         unsigned sum = 0;
325 
326         for (int cb = 0; cb < fb->rt_count; ++cb) {
327                 const struct pan_image_view *rt = fb->rts[cb].view;
328 
329                 if (!rt)
330                         continue;
331 
332                 sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples;
333         }
334 
335         return sum;
336 }
337 
338 /*
339  * Select the largest tile size that fits within the tilebuffer budget.
340  * Formally, maximize (pixels per tile) such that it is a power of two and
341  *
342  *      (bytes per pixel) (pixels per tile) <= (max bytes per tile)
343  *
344  * A bit of algebra gives the following formula.
345  */
346 static unsigned
pan_select_max_tile_size(unsigned tile_buffer_bytes,unsigned bytes_per_pixel)347 pan_select_max_tile_size(unsigned tile_buffer_bytes, unsigned bytes_per_pixel)
348 {
349         assert(util_is_power_of_two_nonzero(tile_buffer_bytes));
350         assert(tile_buffer_bytes >= 1024);
351 
352         return tile_buffer_bytes >> util_logbase2_ceil(bytes_per_pixel);
353 }
354 
355 static enum mali_color_format
pan_mfbd_raw_format(unsigned bits)356 pan_mfbd_raw_format(unsigned bits)
357 {
358         switch (bits) {
359         case    8: return MALI_COLOR_FORMAT_RAW8;
360         case   16: return MALI_COLOR_FORMAT_RAW16;
361         case   24: return MALI_COLOR_FORMAT_RAW24;
362         case   32: return MALI_COLOR_FORMAT_RAW32;
363         case   48: return MALI_COLOR_FORMAT_RAW48;
364         case   64: return MALI_COLOR_FORMAT_RAW64;
365         case   96: return MALI_COLOR_FORMAT_RAW96;
366         case  128: return MALI_COLOR_FORMAT_RAW128;
367         case  192: return MALI_COLOR_FORMAT_RAW192;
368         case  256: return MALI_COLOR_FORMAT_RAW256;
369         case  384: return MALI_COLOR_FORMAT_RAW384;
370         case  512: return MALI_COLOR_FORMAT_RAW512;
371         case  768: return MALI_COLOR_FORMAT_RAW768;
372         case 1024: return MALI_COLOR_FORMAT_RAW1024;
373         case 1536: return MALI_COLOR_FORMAT_RAW1536;
374         case 2048: return MALI_COLOR_FORMAT_RAW2048;
375         default: unreachable("invalid raw bpp");
376         }
377 }
378 
379 static void
pan_rt_init_format(const struct pan_image_view * rt,struct MALI_RENDER_TARGET * cfg)380 pan_rt_init_format(const struct pan_image_view *rt,
381                    struct MALI_RENDER_TARGET *cfg)
382 {
383         /* Explode details on the format */
384 
385         const struct util_format_description *desc =
386                 util_format_description(rt->format);
387 
388         /* The swizzle for rendering is inverted from texturing */
389 
390         unsigned char swizzle[4] = {
391                 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
392         };
393 
394         /* Fill in accordingly, defaulting to 8-bit UNORM */
395 
396         if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
397                 cfg->srgb = true;
398 
399         struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
400 
401         if (fmt.internal) {
402                 cfg->internal_format = fmt.internal;
403                 cfg->writeback_format = fmt.writeback;
404                 panfrost_invert_swizzle(desc->swizzle, swizzle);
405         } else {
406                 /* Construct RAW internal/writeback, where internal is
407                  * specified logarithmically (round to next power-of-two).
408                  * Offset specified from RAW8, where 8 = 2^3 */
409 
410                 unsigned bits = desc->block.bits;
411                 unsigned offset = util_logbase2_ceil(bits) - 3;
412                 assert(offset <= 4);
413 
414                 cfg->internal_format =
415                         MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
416 
417                 cfg->writeback_format = pan_mfbd_raw_format(bits);
418         }
419 
420         cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
421 }
422 
423 #if PAN_ARCH >= 9
424 enum mali_afbc_compression_mode
pan_afbc_compression_mode(enum pipe_format format)425 pan_afbc_compression_mode(enum pipe_format format)
426 {
427         /* There's a special case for texturing the stencil part from a combined
428          * depth/stencil texture, handle it separately.
429          */
430         if (format == PIPE_FORMAT_X24S8_UINT)
431                 return MALI_AFBC_COMPRESSION_MODE_X24S8;
432 
433         /* Otherwise, map canonical formats to the hardware enum. This only
434          * needs to handle the subset of formats returned by
435          * panfrost_afbc_format.
436          */
437         switch (panfrost_afbc_format(PAN_ARCH, format)) {
438         case PIPE_FORMAT_R8G8_UNORM: return MALI_AFBC_COMPRESSION_MODE_R8G8;
439         case PIPE_FORMAT_R8G8B8_UNORM: return MALI_AFBC_COMPRESSION_MODE_R8G8B8;
440         case PIPE_FORMAT_R8G8B8A8_UNORM: return MALI_AFBC_COMPRESSION_MODE_R8G8B8A8;
441         case PIPE_FORMAT_R5G6B5_UNORM: return MALI_AFBC_COMPRESSION_MODE_R5G6B5;
442         case PIPE_FORMAT_S8_UINT: return MALI_AFBC_COMPRESSION_MODE_S8;
443         case PIPE_FORMAT_NONE: unreachable("invalid format for AFBC");
444         default: unreachable("unknown canonical AFBC format");
445         }
446 }
447 #endif
448 
449 static void
pan_prepare_rt(const struct pan_fb_info * fb,unsigned idx,unsigned cbuf_offset,struct MALI_RENDER_TARGET * cfg)450 pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx,
451                unsigned cbuf_offset,
452                struct MALI_RENDER_TARGET *cfg)
453 {
454         cfg->clean_pixel_write_enable = fb->rts[idx].clear;
455         cfg->internal_buffer_offset = cbuf_offset;
456         if (fb->rts[idx].clear) {
457                 cfg->clear.color_0 = fb->rts[idx].clear_value[0];
458                 cfg->clear.color_1 = fb->rts[idx].clear_value[1];
459                 cfg->clear.color_2 = fb->rts[idx].clear_value[2];
460                 cfg->clear.color_3 = fb->rts[idx].clear_value[3];
461         }
462 
463         const struct pan_image_view *rt = fb->rts[idx].view;
464         if (!rt || fb->rts[idx].discard) {
465                 cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
466                 cfg->internal_buffer_offset = cbuf_offset;
467 #if PAN_ARCH >= 7
468                 cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
469                 cfg->dithering_enable = true;
470 #endif
471                 return;
472         }
473 
474         cfg->write_enable = true;
475         cfg->dithering_enable = true;
476 
477         unsigned level = rt->first_level;
478         assert(rt->last_level == rt->first_level);
479         assert(rt->last_layer == rt->first_layer);
480 
481         int row_stride = rt->image->layout.slices[level].row_stride;
482 
483         /* Only set layer_stride for layered MSAA rendering  */
484 
485         unsigned layer_stride =
486                 (rt->image->layout.nr_samples > 1) ?
487                         rt->image->layout.slices[level].surface_stride : 0;
488 
489         cfg->writeback_msaa = mali_sampling_mode(rt);
490 
491         pan_rt_init_format(rt, cfg);
492 
493         cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
494 
495         struct pan_surface surf;
496         pan_iview_get_surface(rt, 0, 0, 0, &surf);
497 
498         if (drm_is_afbc(rt->image->layout.modifier)) {
499 #if PAN_ARCH >= 9
500                 if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
501                         cfg->afbc.yuv_transform = true;
502 
503                 cfg->afbc.wide_block = panfrost_afbc_is_wide(rt->image->layout.modifier);
504                 cfg->afbc.header = surf.afbc.header;
505                 cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header;
506                 assert(surf.afbc.body >= surf.afbc.header);
507 
508                 cfg->afbc.compression_mode = pan_afbc_compression_mode(rt->format);
509                 cfg->afbc.row_stride = row_stride;
510 #else
511                 const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level];
512 
513 #if PAN_ARCH >= 6
514                 cfg->afbc.row_stride = pan_afbc_stride_blocks(rt->image->layout.modifier, slice->row_stride);
515                 cfg->afbc.afbc_wide_block_enable =
516                         panfrost_afbc_is_wide(rt->image->layout.modifier);
517 #else
518                 cfg->afbc.chunk_size = 9;
519                 cfg->afbc.sparse = true;
520                 cfg->afbc.body_size = slice->afbc.body_size;
521 #endif
522 
523                 cfg->afbc.header = surf.afbc.header;
524                 cfg->afbc.body = surf.afbc.body;
525 
526                 if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
527                         cfg->afbc.yuv_transform_enable = true;
528 #endif
529         } else {
530                 assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
531                        rt->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
532                 cfg->rgb.base = surf.data;
533                 cfg->rgb.row_stride = row_stride;
534                 cfg->rgb.surface_stride = layer_stride;
535         }
536 }
537 #endif
538 
539 void
GENX(pan_emit_tls)540 GENX(pan_emit_tls)(const struct pan_tls_info *info,
541                    void *out)
542 {
543         pan_pack(out, LOCAL_STORAGE, cfg) {
544                 if (info->tls.size) {
545                         unsigned shift =
546                                 panfrost_get_stack_shift(info->tls.size);
547 
548                         cfg.tls_size = shift;
549 #if PAN_ARCH >= 9
550                         /* For now, always use packed TLS addressing. This is
551                          * better for the cache and requires no fix up code in
552                          * the shader. We may need to revisit this someday for
553                          * OpenCL generic pointer support.
554                          */
555                         cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED;
556 
557                         assert((info->tls.ptr & 4095) == 0);
558                         cfg.tls_base_pointer = info->tls.ptr >> 8;
559 #else
560                         cfg.tls_base_pointer = info->tls.ptr;
561 #endif
562                 }
563 
564                 if (info->wls.size) {
565                         assert(!(info->wls.ptr & 4095));
566                         assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
567                         cfg.wls_base_pointer = info->wls.ptr;
568                         unsigned wls_size = pan_wls_adjust_size(info->wls.size);
569                         cfg.wls_instances = pan_wls_instances(&info->wls.dim);
570                         cfg.wls_size_scale = util_logbase2(wls_size) + 1;
571                 } else {
572                         cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
573                 }
574         }
575 }
576 
577 #if PAN_ARCH <= 5
578 static void
pan_emit_midgard_tiler(const struct panfrost_device * dev,const struct pan_fb_info * fb,const struct pan_tiler_context * tiler_ctx,void * out)579 pan_emit_midgard_tiler(const struct panfrost_device *dev,
580                        const struct pan_fb_info *fb,
581                        const struct pan_tiler_context *tiler_ctx,
582                        void *out)
583 {
584         bool hierarchy = !dev->model->quirks.no_hierarchical_tiling;
585 
586         assert(tiler_ctx->midgard.polygon_list->ptr.gpu);
587 
588         pan_pack(out, TILER_CONTEXT, cfg) {
589                 unsigned header_size;
590 
591                 if (tiler_ctx->midgard.disable) {
592                         cfg.hierarchy_mask =
593                                 hierarchy ?
594                                 MALI_MIDGARD_TILER_DISABLED :
595                                 MALI_MIDGARD_TILER_USER;
596                         header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
597                         cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
598                         cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu;
599                         cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu;
600 		} else {
601                         cfg.hierarchy_mask =
602                                 panfrost_choose_hierarchy_mask(fb->width,
603                                                                fb->height,
604                                                                1, hierarchy);
605                         header_size = panfrost_tiler_header_size(fb->width,
606                                                                  fb->height,
607                                                                  cfg.hierarchy_mask,
608                                                                  hierarchy);
609                         cfg.polygon_list_size =
610                                 panfrost_tiler_full_size(fb->width, fb->height,
611                                                          cfg.hierarchy_mask,
612                                                          hierarchy);
613                         cfg.heap_start = dev->tiler_heap->ptr.gpu;
614                         cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
615                 }
616 
617                 cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu;
618                 cfg.polygon_list_body = cfg.polygon_list + header_size;
619         }
620 }
621 #endif
622 
623 #if PAN_ARCH >= 5
624 static void
pan_emit_rt(const struct pan_fb_info * fb,unsigned idx,unsigned cbuf_offset,void * out)625 pan_emit_rt(const struct pan_fb_info *fb,
626             unsigned idx, unsigned cbuf_offset, void *out)
627 {
628         pan_pack(out, RENDER_TARGET, cfg) {
629                 pan_prepare_rt(fb, idx, cbuf_offset, &cfg);
630         }
631 }
632 
633 #if PAN_ARCH >= 6
634 /* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
635  *
636  *      Forcing clean_tile_writes breaks INTERSECT readbacks
637  *
638  * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
639  * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
640  * ignore, this cannot affect correctness, only performance */
641 
642 static enum mali_pre_post_frame_shader_mode
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,bool force_clean_tile)643 pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_clean_tile)
644 {
645         if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
646                 return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
647         else
648                 return mode;
649 }
650 
651 /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
652  * the effective tile size differs from the superblock size of any enabled AFBC
653  * render target. Check this condition. */
654 
655 static bool
pan_force_clean_write_rt(const struct pan_image_view * rt,unsigned tile_size)656 pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size)
657 {
658         if (!drm_is_afbc(rt->image->layout.modifier))
659                 return false;
660 
661         unsigned superblock = panfrost_afbc_superblock_width(rt->image->layout.modifier);
662 
663         assert(superblock >= 16);
664         assert(tile_size <= 16*16);
665 
666         /* Tile size and superblock differ unless they are both 16x16 */
667         return !(superblock == 16 && tile_size == 16*16);
668 }
669 
670 static bool
pan_force_clean_write(const struct pan_fb_info * fb,unsigned tile_size)671 pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
672 {
673         /* Maximum tile size */
674         assert(tile_size <= 16*16);
675 
676         for (unsigned i = 0; i < fb->rt_count; ++i) {
677                 if (fb->rts[i].view && !fb->rts[i].discard &&
678                     pan_force_clean_write_rt(fb->rts[i].view, tile_size))
679                         return true;
680         }
681 
682         if (fb->zs.view.zs && !fb->zs.discard.z &&
683             pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
684                 return true;
685 
686         if (fb->zs.view.s && !fb->zs.discard.s &&
687             pan_force_clean_write_rt(fb->zs.view.s, tile_size))
688                 return true;
689 
690         return false;
691 }
692 
693 #endif
694 
695 unsigned
GENX(pan_emit_fbd)696 GENX(pan_emit_fbd)(const struct panfrost_device *dev,
697                    const struct pan_fb_info *fb,
698                    const struct pan_tls_info *tls,
699                    const struct pan_tiler_context *tiler_ctx,
700                    void *out)
701 {
702         unsigned tags = MALI_FBD_TAG_IS_MFBD;
703         void *fbd = out;
704         void *rtd = out + pan_size(FRAMEBUFFER);
705 
706 #if PAN_ARCH <= 5
707         GENX(pan_emit_tls)(tls,
708                            pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
709 #endif
710 
711         unsigned bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
712         unsigned tile_size = pan_select_max_tile_size(dev->optimal_tib_size,
713                                                       bytes_per_pixel);
714 
715         /* Clamp tile size to hardware limits */
716         tile_size = MIN2(tile_size, 16 * 16);
717         assert(tile_size >= 4 * 4);
718 
719         /* Colour buffer allocations must be 1K aligned. */
720         unsigned cbuf_allocation = ALIGN_POT(bytes_per_pixel * tile_size, 1024);
721         assert(cbuf_allocation <= dev->optimal_tib_size && "tile too big");
722 
723         int crc_rt = GENX(pan_select_crc_rt)(fb, tile_size);
724         bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
725 
726         pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
727 #if PAN_ARCH >= 6
728                 bool force_clean_write = pan_force_clean_write(fb, tile_size);
729 
730                 cfg.sample_locations =
731                         panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples));
732                 cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], force_clean_write);
733                 cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], force_clean_write);
734                 cfg.post_frame  = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2], force_clean_write);
735                 cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
736                 cfg.tiler = tiler_ctx->bifrost;
737 #endif
738                 cfg.width = fb->width;
739                 cfg.height = fb->height;
740                 cfg.bound_max_x = fb->width - 1;
741                 cfg.bound_max_y = fb->height - 1;
742 
743                 cfg.effective_tile_size = tile_size;
744                 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
745                 cfg.render_target_count = MAX2(fb->rt_count, 1);
746 
747                 /* Default to 24 bit depth if there's no surface. */
748                 cfg.z_internal_format =
749                         fb->zs.view.zs ?
750                         panfrost_get_z_internal_format(fb->zs.view.zs->format) :
751                         MALI_Z_INTERNAL_FORMAT_D24;
752 
753                 cfg.z_clear = fb->zs.clear_value.depth;
754                 cfg.s_clear = fb->zs.clear_value.stencil;
755                 cfg.color_buffer_allocation = cbuf_allocation;
756                 cfg.sample_count = fb->nr_samples;
757                 cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
758                 cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
759                 cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
760                 cfg.has_zs_crc_extension = has_zs_crc_ext;
761 
762                 if (crc_rt >= 0) {
763                         bool *valid = fb->rts[crc_rt].crc_valid;
764                         bool full = !fb->extent.minx && !fb->extent.miny &&
765                                     fb->extent.maxx == (fb->width - 1) &&
766                                     fb->extent.maxy == (fb->height - 1);
767 
768                         cfg.crc_read_enable = *valid;
769 
770                         /* If the data is currently invalid, still write CRC
771                          * data if we are doing a full write, so that it is
772                          * valid for next time. */
773                         cfg.crc_write_enable = *valid || full;
774 
775                         *valid |= full;
776                 }
777 
778 #if PAN_ARCH >= 9
779                 cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
780                 cfg.first_provoking_vertex = fb->first_provoking_vertex;
781 #endif
782         }
783 
784 #if PAN_ARCH >= 6
785         pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding);
786 #else
787         pan_emit_midgard_tiler(dev, fb, tiler_ctx,
788                                pan_section_ptr(fbd, FRAMEBUFFER, TILER));
789 
790         /* All weights set to 0, nothing to do here */
791         pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
792 #endif
793 
794         if (has_zs_crc_ext) {
795                 pan_emit_zs_crc_ext(fb, crc_rt,
796                                     out + pan_size(FRAMEBUFFER));
797                 rtd += pan_size(ZS_CRC_EXTENSION);
798                 tags |= MALI_FBD_TAG_HAS_ZS_RT;
799         }
800 
801         unsigned rt_count = MAX2(fb->rt_count, 1);
802         unsigned cbuf_offset = 0;
803         for (unsigned i = 0; i < rt_count; i++) {
804                 pan_emit_rt(fb, i, cbuf_offset, rtd);
805                 rtd += pan_size(RENDER_TARGET);
806                 if (!fb->rts[i].view)
807                         continue;
808 
809                 cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
810                                tile_size * fb->rts[i].view->image->layout.nr_samples;
811 
812                 if (i != crc_rt)
813                         *(fb->rts[i].crc_valid) = false;
814         }
815         tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2;
816 
817         return tags;
818 }
819 #else /* PAN_ARCH == 4 */
820 unsigned
GENX(pan_emit_fbd)821 GENX(pan_emit_fbd)(const struct panfrost_device *dev,
822                    const struct pan_fb_info *fb,
823                    const struct pan_tls_info *tls,
824                    const struct pan_tiler_context *tiler_ctx,
825                    void *fbd)
826 {
827         assert(fb->rt_count <= 1);
828 
829         GENX(pan_emit_tls)(tls,
830                            pan_section_ptr(fbd, FRAMEBUFFER,
831                                            LOCAL_STORAGE));
832         pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
833                 cfg.bound_max_x = fb->width - 1;
834                 cfg.bound_max_y = fb->height - 1;
835                 cfg.dithering_enable = true;
836                 cfg.clean_pixel_write_enable = true;
837                 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
838                 if (fb->rts[0].clear) {
839                         cfg.clear_color_0 = fb->rts[0].clear_value[0];
840                         cfg.clear_color_1 = fb->rts[0].clear_value[1];
841                         cfg.clear_color_2 = fb->rts[0].clear_value[2];
842                         cfg.clear_color_3 = fb->rts[0].clear_value[3];
843                 }
844 
845                 if (fb->zs.clear.z)
846                         cfg.z_clear = fb->zs.clear_value.depth;
847 
848                 if (fb->zs.clear.s)
849                         cfg.s_clear = fb->zs.clear_value.stencil;
850 
851                 if (fb->rt_count && fb->rts[0].view) {
852                         const struct pan_image_view *rt = fb->rts[0].view;
853 
854                         const struct util_format_description *desc =
855                                 util_format_description(rt->format);
856 
857                         /* The swizzle for rendering is inverted from texturing */
858                         unsigned char swizzle[4];
859                         panfrost_invert_swizzle(desc->swizzle, swizzle);
860                         cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
861 
862                         struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
863                         if (fmt.internal) {
864                                 cfg.internal_format = fmt.internal;
865                                 cfg.color_writeback_format = fmt.writeback;
866                         } else {
867                                 unreachable("raw formats not finished for SFBD");
868                         }
869 
870                         unsigned level = rt->first_level;
871                         struct pan_surface surf;
872 
873                         pan_iview_get_surface(rt, 0, 0, 0, &surf);
874 
875                         cfg.color_write_enable = !fb->rts[0].discard;
876                         cfg.color_writeback.base = surf.data;
877                         cfg.color_writeback.row_stride =
878 	                        rt->image->layout.slices[level].row_stride;
879 
880                         cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier);
881                         assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
882                                cfg.color_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
883 
884                         if (rt->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) {
885                                 const struct pan_image_slice_layout *slice =
886                                         &rt->image->layout.slices[level];
887 
888                                 cfg.crc_buffer.row_stride = slice->crc.stride;
889                                 if (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND) {
890                                         cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu +
891                                                               rt->image->data.offset +
892                                                               slice->crc.offset;
893                                 } else {
894                                         cfg.crc_buffer.base = rt->image->crc.bo->ptr.gpu +
895                                                               rt->image->crc.offset +
896                                                               slice->crc.offset;
897                                 }
898                         }
899                 }
900 
901                 if (fb->zs.view.zs) {
902                         const struct pan_image_view *zs = fb->zs.view.zs;
903                         unsigned level = zs->first_level;
904                         struct pan_surface surf;
905 
906                         pan_iview_get_surface(zs, 0, 0, 0, &surf);
907 
908                         cfg.zs_write_enable = !fb->zs.discard.z;
909                         cfg.zs_writeback.base = surf.data;
910                         cfg.zs_writeback.row_stride =
911                                 zs->image->layout.slices[level].row_stride;
912                         cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
913                         assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
914                                cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
915 
916                         cfg.zs_format = translate_zs_format(zs->format);
917                 }
918 
919                 cfg.sample_count = fb->nr_samples;
920 
921                 if (fb->rt_count)
922                         cfg.msaa = mali_sampling_mode(fb->rts[0].view);
923         }
924 
925         pan_emit_midgard_tiler(dev, fb, tiler_ctx,
926                                pan_section_ptr(fbd, FRAMEBUFFER, TILER));
927 
928         /* All weights set to 0, nothing to do here */
929         pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
930 
931         pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding);
932         pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding);
933         return 0;
934 }
935 #endif
936 
937 #if PAN_ARCH >= 6
938 void
GENX(pan_emit_tiler_heap)939 GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev,
940                           void *out)
941 {
942         pan_pack(out, TILER_HEAP, heap) {
943                 heap.size = dev->tiler_heap->size;
944                 heap.base = dev->tiler_heap->ptr.gpu;
945                 heap.bottom = dev->tiler_heap->ptr.gpu;
946                 heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
947         }
948 }
949 
950 void
GENX(pan_emit_tiler_ctx)951 GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev,
952                          unsigned fb_width, unsigned fb_height,
953                          unsigned nr_samples,
954                          bool first_provoking_vertex,
955                          mali_ptr heap,
956                          void *out)
957 {
958         unsigned max_levels = dev->tiler_features.max_levels;
959         assert(max_levels >= 2);
960 
961         pan_pack(out, TILER_CONTEXT, tiler) {
962                 /* TODO: Select hierarchy mask more effectively */
963                 tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
964 
965                 /* For large framebuffers, disable the smallest bin size to
966                  * avoid pathological tiler memory usage. Required to avoid OOM
967                  * on dEQP-GLES31.functional.fbo.no_attachments.maximums.all on
968                  * Mali-G57.
969                  */
970                 if (MAX2(fb_width, fb_height) >= 4096)
971                         tiler.hierarchy_mask &= ~1;
972 
973                 tiler.fb_width = fb_width;
974                 tiler.fb_height = fb_height;
975                 tiler.heap = heap;
976                 tiler.sample_pattern = pan_sample_pattern(nr_samples);
977 #if PAN_ARCH >= 9
978                 tiler.first_provoking_vertex = first_provoking_vertex;
979 #endif
980         }
981 }
982 #endif
983 
984 void
GENX(pan_emit_fragment_job)985 GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb,
986                             mali_ptr fbd,
987                             void *out)
988 {
989         pan_section_pack(out, FRAGMENT_JOB, HEADER, header) {
990                 header.type = MALI_JOB_TYPE_FRAGMENT;
991                 header.index = 1;
992         }
993 
994         pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
995                 payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
996                 payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
997                 payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
998                 payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
999                 payload.framebuffer = fbd;
1000 
1001 #if PAN_ARCH >= 5
1002                 if (fb->tile_map.base) {
1003                         payload.has_tile_enable_map = true;
1004                         payload.tile_enable_map = fb->tile_map.base;
1005                         payload.tile_enable_map_row_stride = fb->tile_map.stride;
1006                 }
1007 #endif
1008         }
1009 }
1010