1 /*
2 * Copyright (C) 2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 * Boris Brezillon <boris.brezillon@collabora.com>
26 */
27
28 #include "util/macros.h"
29
30 #include "genxml/gen_macros.h"
31
32 #include "pan_desc.h"
33 #include "pan_encoder.h"
34 #include "pan_props.h"
35 #include "pan_texture.h"
36
37 static unsigned
mod_to_block_fmt(uint64_t mod)38 mod_to_block_fmt(uint64_t mod)
39 {
40 switch (mod) {
41 case DRM_FORMAT_MOD_LINEAR:
42 return MALI_BLOCK_FORMAT_LINEAR;
43 case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
44 return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
45 default:
46 #if PAN_ARCH >= 5
47 if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED))
48 return MALI_BLOCK_FORMAT_AFBC;
49 #endif
50 #if PAN_ARCH >= 7
51 if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED))
52 return MALI_BLOCK_FORMAT_AFBC_TILED;
53 #endif
54 #if PAN_ARCH >= 10
55 if (drm_is_afrc(mod))
56 return 0; /* Reserved field for AFRC state */
57 #endif
58
59 unreachable("Unsupported modifer");
60 }
61 }
62
63 static enum mali_msaa
mali_sampling_mode(const struct pan_image_view * view)64 mali_sampling_mode(const struct pan_image_view *view)
65 {
66 unsigned nr_samples = pan_image_view_get_nr_samples(view);
67
68 if (nr_samples > 1) {
69 ASSERTED const struct pan_image *first_plane =
70 pan_image_view_get_first_plane(view);
71
72 assert(view->nr_samples == nr_samples);
73 assert(first_plane->layout.slices[0].surface_stride != 0);
74 return MALI_MSAA_LAYERED;
75 }
76
77 if (view->nr_samples > nr_samples) {
78 assert(nr_samples == 1);
79 return MALI_MSAA_AVERAGE;
80 }
81
82 assert(view->nr_samples == nr_samples);
83 assert(view->nr_samples == 1);
84
85 return MALI_MSAA_SINGLE;
86 }
87
88 static bool
renderblock_fits_in_single_pass(const struct pan_image_view * view,unsigned tile_size)89 renderblock_fits_in_single_pass(const struct pan_image_view *view,
90 unsigned tile_size)
91 {
92 const struct pan_image *first_plane = pan_image_view_get_first_plane(view);
93 uint64_t mod = first_plane->layout.modifier;
94
95 if (!drm_is_afbc(mod))
96 return tile_size >= 16 * 16;
97
98 struct pan_block_size renderblk_sz = panfrost_afbc_renderblock_size(mod);
99 return tile_size >= renderblk_sz.width * renderblk_sz.height;
100 }
101
102 int
GENX(pan_select_crc_rt)103 GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
104 {
105 /* Disable CRC when the tile size is smaller than 16x16. In the hardware,
106 * CRC tiles are the same size as the tiles of the framebuffer. However,
107 * our code only handles 16x16 tiles. Therefore under the current
108 * implementation, we must disable CRC when 16x16 tiles are not used.
109 *
110 * This may hurt performance. However, smaller tile sizes are rare, and
111 * CRCs are more expensive at smaller tile sizes, reducing the benefit.
112 * Restricting CRC to 16x16 should work in practice.
113 */
114 if (tile_size < 16 * 16)
115 return -1;
116
117 #if PAN_ARCH <= 6
118 if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
119 pan_image_view_has_crc(fb->rts[0].view))
120 return 0;
121
122 return -1;
123 #else
124 bool best_rt_valid = false;
125 int best_rt = -1;
126
127 for (unsigned i = 0; i < fb->rt_count; i++) {
128 if (!fb->rts[i].view || fb->rts[i].discard ||
129 !pan_image_view_has_crc(fb->rts[i].view))
130 continue;
131
132 if (!renderblock_fits_in_single_pass(fb->rts[i].view, tile_size))
133 continue;
134
135 bool valid = *(fb->rts[i].crc_valid);
136 bool full = !fb->extent.minx && !fb->extent.miny &&
137 fb->extent.maxx == (fb->width - 1) &&
138 fb->extent.maxy == (fb->height - 1);
139 if (!full && !valid)
140 continue;
141
142 if (best_rt < 0 || (valid && !best_rt_valid)) {
143 best_rt = i;
144 best_rt_valid = valid;
145 }
146
147 if (valid)
148 break;
149 }
150
151 return best_rt;
152 #endif
153 }
154
155 static enum mali_zs_format
translate_zs_format(enum pipe_format in)156 translate_zs_format(enum pipe_format in)
157 {
158 switch (in) {
159 case PIPE_FORMAT_Z16_UNORM:
160 return MALI_ZS_FORMAT_D16;
161 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
162 return MALI_ZS_FORMAT_D24S8;
163 case PIPE_FORMAT_Z24X8_UNORM:
164 return MALI_ZS_FORMAT_D24X8;
165 case PIPE_FORMAT_Z32_FLOAT:
166 return MALI_ZS_FORMAT_D32;
167 #if PAN_ARCH <= 7
168 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
169 return MALI_ZS_FORMAT_D32_S8X24;
170 #endif
171 default:
172 unreachable("Unsupported depth/stencil format.");
173 }
174 }
175
176 #if PAN_ARCH >= 5
177 static enum mali_s_format
translate_s_format(enum pipe_format in)178 translate_s_format(enum pipe_format in)
179 {
180 switch (in) {
181 case PIPE_FORMAT_S8_UINT:
182 return MALI_S_FORMAT_S8;
183 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
184 case PIPE_FORMAT_X24S8_UINT:
185 return MALI_S_FORMAT_X24S8;
186
187 #if PAN_ARCH <= 7
188 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
189 case PIPE_FORMAT_S8X24_UINT:
190 return MALI_S_FORMAT_S8X24;
191 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
192 case PIPE_FORMAT_X32_S8X24_UINT:
193 return MALI_S_FORMAT_X32_S8X24;
194 #endif
195
196 default:
197 unreachable("Unsupported stencil format.");
198 }
199 }
200
201 static void
pan_prepare_s(const struct pan_fb_info * fb,unsigned layer_idx,struct MALI_ZS_CRC_EXTENSION * ext)202 pan_prepare_s(const struct pan_fb_info *fb, unsigned layer_idx,
203 struct MALI_ZS_CRC_EXTENSION *ext)
204 {
205 const struct pan_image_view *s = fb->zs.view.s;
206
207 if (!s)
208 return;
209
210 const struct pan_image *image = pan_image_view_get_s_plane(s);
211 unsigned level = s->first_level;
212
213 ext->s_msaa = mali_sampling_mode(s);
214
215 struct pan_surface surf;
216 pan_iview_get_surface(s, 0, layer_idx, 0, &surf);
217
218 assert(image->layout.modifier ==
219 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
220 image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
221 ext->s_writeback_base = surf.data;
222 ext->s_writeback_row_stride = image->layout.slices[level].row_stride;
223 ext->s_writeback_surface_stride =
224 (pan_image_view_get_nr_samples(s) > 1)
225 ? image->layout.slices[level].surface_stride
226 : 0;
227 ext->s_block_format = mod_to_block_fmt(image->layout.modifier);
228 ext->s_write_format = translate_s_format(s->format);
229 }
230
231 static void
pan_prepare_zs(const struct pan_fb_info * fb,unsigned layer_idx,struct MALI_ZS_CRC_EXTENSION * ext)232 pan_prepare_zs(const struct pan_fb_info *fb, unsigned layer_idx,
233 struct MALI_ZS_CRC_EXTENSION *ext)
234 {
235 const struct pan_image_view *zs = fb->zs.view.zs;
236
237 if (!zs)
238 return;
239
240 const struct pan_image *image = pan_image_view_get_zs_plane(zs);
241 unsigned level = zs->first_level;
242
243 ext->zs_msaa = mali_sampling_mode(zs);
244
245 struct pan_surface surf;
246 pan_iview_get_surface(zs, 0, layer_idx, 0, &surf);
247 UNUSED const struct pan_image_slice_layout *slice =
248 &image->layout.slices[level];
249
250 if (drm_is_afbc(image->layout.modifier)) {
251 #if PAN_ARCH >= 9
252 ext->zs_writeback_base = surf.afbc.header;
253 ext->zs_writeback_row_stride = slice->row_stride;
254 /* TODO: surface stride? */
255 ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header;
256
257 /* TODO: stencil AFBC? */
258 #else
259 #if PAN_ARCH >= 6
260 ext->zs_afbc_row_stride =
261 pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
262 #else
263 ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
264 ext->zs_afbc_body_size = 0x1000;
265 ext->zs_afbc_chunk_size = 9;
266 ext->zs_afbc_sparse = true;
267 #endif
268
269 ext->zs_afbc_header = surf.afbc.header;
270 ext->zs_afbc_body = surf.afbc.body;
271 #endif
272 } else {
273 assert(image->layout.modifier ==
274 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
275 image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
276
277 /* TODO: Z32F(S8) support, which is always linear */
278
279 ext->zs_writeback_base = surf.data;
280 ext->zs_writeback_row_stride = image->layout.slices[level].row_stride;
281 ext->zs_writeback_surface_stride =
282 (pan_image_view_get_nr_samples(zs) > 1)
283 ? image->layout.slices[level].surface_stride
284 : 0;
285 }
286
287 ext->zs_block_format = mod_to_block_fmt(image->layout.modifier);
288 ext->zs_write_format = translate_zs_format(zs->format);
289 if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
290 ext->s_writeback_base = ext->zs_writeback_base;
291 }
292
293 static void
pan_prepare_crc(const struct pan_fb_info * fb,int rt_crc,struct MALI_ZS_CRC_EXTENSION * ext)294 pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
295 struct MALI_ZS_CRC_EXTENSION *ext)
296 {
297 if (rt_crc < 0)
298 return;
299
300 assert(rt_crc < fb->rt_count);
301
302 const struct pan_image_view *rt = fb->rts[rt_crc].view;
303 const struct pan_image *image = pan_image_view_get_color_plane(rt);
304 const struct pan_image_slice_layout *slice =
305 &image->layout.slices[rt->first_level];
306
307 ext->crc_base =
308 image->data.base + image->data.offset + slice->crc.offset;
309 ext->crc_row_stride = slice->crc.stride;
310
311 #if PAN_ARCH >= 7
312 ext->crc_render_target = rt_crc;
313
314 if (fb->rts[rt_crc].clear) {
315 uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
316 ext->crc_clear_color = clear_val | 0xc000000000000000 |
317 (((uint64_t)clear_val & 0xffff) << 32);
318 }
319 #endif
320 }
321
322 static void
pan_emit_zs_crc_ext(const struct pan_fb_info * fb,unsigned layer_idx,int rt_crc,struct mali_zs_crc_extension_packed * zs_crc_ext)323 pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx,
324 int rt_crc, struct mali_zs_crc_extension_packed *zs_crc_ext)
325 {
326 pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
327 pan_prepare_crc(fb, rt_crc, &cfg);
328 cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
329 pan_prepare_zs(fb, layer_idx, &cfg);
330 pan_prepare_s(fb, layer_idx, &cfg);
331 }
332 }
333
334 /* Measure format as it appears in the tile buffer */
335
336 static unsigned
pan_bytes_per_pixel_tib(enum pipe_format format)337 pan_bytes_per_pixel_tib(enum pipe_format format)
338 {
339 const struct pan_blendable_format *bf =
340 GENX(panfrost_blendable_format_from_pipe_format)(format);
341
342 if (bf->internal) {
343 /* Blendable formats are always 32-bits in the tile buffer,
344 * extra bits are used as padding or to dither */
345 return 4;
346 } else {
347 /* Non-blendable formats are raw, rounded up to the nearest
348 * power-of-two size */
349 unsigned bytes = util_format_get_blocksize(format);
350 return util_next_power_of_two(bytes);
351 }
352 }
353
354 static unsigned
pan_cbuf_bytes_per_pixel(const struct pan_fb_info * fb)355 pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
356 {
357 unsigned sum = 0;
358
359 for (int cb = 0; cb < fb->rt_count; ++cb) {
360 const struct pan_image_view *rt = fb->rts[cb].view;
361
362 if (!rt)
363 continue;
364
365 sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples;
366 }
367
368 return sum;
369 }
370
371 /*
372 * Select the largest tile size that fits within the tilebuffer budget.
373 * Formally, maximize (pixels per tile) such that it is a power of two and
374 *
375 * (bytes per pixel) (pixels per tile) <= (max bytes per tile)
376 *
377 * A bit of algebra gives the following formula.
378 *
379 * Calculate the color buffer allocation size as well.
380 */
381 void
GENX(pan_select_tile_size)382 GENX(pan_select_tile_size)(struct pan_fb_info *fb)
383 {
384 unsigned bytes_per_pixel;
385
386 assert(util_is_power_of_two_nonzero(fb->tile_buf_budget));
387 assert(fb->tile_buf_budget >= 1024);
388
389 bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
390 fb->tile_size = fb->tile_buf_budget >> util_logbase2_ceil(bytes_per_pixel);
391
392 /* Clamp tile size to hardware limits */
393 fb->tile_size =
394 MIN2(fb->tile_size, panfrost_max_effective_tile_size(PAN_ARCH));
395 assert(fb->tile_size >= 4 * 4);
396
397 /* Colour buffer allocations must be 1K aligned. */
398 fb->cbuf_allocation = ALIGN_POT(bytes_per_pixel * fb->tile_size, 1024);
399 assert(fb->cbuf_allocation <= fb->tile_buf_budget && "tile too big");
400 }
401
402 static enum mali_color_format
pan_mfbd_raw_format(unsigned bits)403 pan_mfbd_raw_format(unsigned bits)
404 {
405 /* clang-format off */
406 switch (bits) {
407 case 8: return MALI_COLOR_FORMAT_RAW8;
408 case 16: return MALI_COLOR_FORMAT_RAW16;
409 case 24: return MALI_COLOR_FORMAT_RAW24;
410 case 32: return MALI_COLOR_FORMAT_RAW32;
411 case 48: return MALI_COLOR_FORMAT_RAW48;
412 case 64: return MALI_COLOR_FORMAT_RAW64;
413 case 96: return MALI_COLOR_FORMAT_RAW96;
414 case 128: return MALI_COLOR_FORMAT_RAW128;
415 case 192: return MALI_COLOR_FORMAT_RAW192;
416 case 256: return MALI_COLOR_FORMAT_RAW256;
417 case 384: return MALI_COLOR_FORMAT_RAW384;
418 case 512: return MALI_COLOR_FORMAT_RAW512;
419 case 768: return MALI_COLOR_FORMAT_RAW768;
420 case 1024: return MALI_COLOR_FORMAT_RAW1024;
421 case 1536: return MALI_COLOR_FORMAT_RAW1536;
422 case 2048: return MALI_COLOR_FORMAT_RAW2048;
423 default: unreachable("invalid raw bpp");
424 }
425 /* clang-format on */
426 }
427
428 static void
pan_rt_init_format(const struct pan_image_view * rt,struct MALI_RENDER_TARGET * cfg)429 pan_rt_init_format(const struct pan_image_view *rt,
430 struct MALI_RENDER_TARGET *cfg)
431 {
432 /* Explode details on the format */
433
434 const struct util_format_description *desc =
435 util_format_description(rt->format);
436
437 /* The swizzle for rendering is inverted from texturing */
438
439 unsigned char swizzle[4] = {
440 PIPE_SWIZZLE_X,
441 PIPE_SWIZZLE_Y,
442 PIPE_SWIZZLE_Z,
443 PIPE_SWIZZLE_W,
444 };
445
446 /* Fill in accordingly, defaulting to 8-bit UNORM */
447
448 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
449 cfg->srgb = true;
450
451 struct pan_blendable_format fmt =
452 *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
453 enum mali_color_format writeback_format;
454
455 if (fmt.internal) {
456 cfg->internal_format = fmt.internal;
457 writeback_format = fmt.writeback;
458 panfrost_invert_swizzle(desc->swizzle, swizzle);
459 } else {
460 /* Construct RAW internal/writeback, where internal is
461 * specified logarithmically (round to next power-of-two).
462 * Offset specified from RAW8, where 8 = 2^3 */
463
464 unsigned bits = desc->block.bits;
465 assert(bits >= 8 && bits <= 128);
466 unsigned offset = util_logbase2_ceil(bits) - 3;
467 assert(offset <= 4);
468
469 cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
470 writeback_format = pan_mfbd_raw_format(bits);
471 }
472
473 #if PAN_ARCH >= 10
474 const struct pan_image *image = pan_image_view_get_color_plane(rt);
475
476 if (drm_is_afrc(image->layout.modifier))
477 cfg->afrc.writeback_format = writeback_format;
478 else
479 cfg->writeback_format = writeback_format;
480 #else
481 cfg->writeback_format = writeback_format;
482 #endif
483
484 cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
485 }
486
487 static void
pan_prepare_rt(const struct pan_fb_info * fb,unsigned layer_idx,unsigned rt_idx,unsigned cbuf_offset,struct MALI_RENDER_TARGET * cfg)488 pan_prepare_rt(const struct pan_fb_info *fb, unsigned layer_idx,
489 unsigned rt_idx, unsigned cbuf_offset,
490 struct MALI_RENDER_TARGET *cfg)
491 {
492 cfg->clean_pixel_write_enable = fb->rts[rt_idx].clear;
493 cfg->internal_buffer_offset = cbuf_offset;
494 if (fb->rts[rt_idx].clear) {
495 cfg->clear.color_0 = fb->rts[rt_idx].clear_value[0];
496 cfg->clear.color_1 = fb->rts[rt_idx].clear_value[1];
497 cfg->clear.color_2 = fb->rts[rt_idx].clear_value[2];
498 cfg->clear.color_3 = fb->rts[rt_idx].clear_value[3];
499 }
500
501 const struct pan_image_view *rt = fb->rts[rt_idx].view;
502 if (!rt || fb->rts[rt_idx].discard) {
503 cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
504 cfg->internal_buffer_offset = cbuf_offset;
505 #if PAN_ARCH >= 7
506 cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
507 cfg->dithering_enable = true;
508 #endif
509 return;
510 }
511
512 const struct pan_image *image = pan_image_view_get_color_plane(rt);
513
514 if (!drm_is_afrc(image->layout.modifier))
515 cfg->write_enable = true;
516
517 cfg->dithering_enable = true;
518
519 const struct pan_image *first_plane = pan_image_view_get_first_plane(rt);
520 unsigned level = rt->first_level;
521 ASSERTED unsigned layer_count = rt->dim == MALI_TEXTURE_DIMENSION_3D
522 ? first_plane->layout.depth
523 : rt->last_layer - rt->first_layer + 1;
524
525 assert(rt->last_level == rt->first_level);
526 assert(layer_idx < layer_count);
527
528 int row_stride = image->layout.slices[level].row_stride;
529
530 /* Only set layer_stride for layered MSAA rendering */
531
532 unsigned layer_stride = (pan_image_view_get_nr_samples(rt) > 1)
533 ? image->layout.slices[level].surface_stride
534 : 0;
535
536 cfg->writeback_msaa = mali_sampling_mode(rt);
537
538 pan_rt_init_format(rt, cfg);
539
540 cfg->writeback_block_format = mod_to_block_fmt(image->layout.modifier);
541
542 struct pan_surface surf;
543 pan_iview_get_surface(rt, 0, layer_idx, 0, &surf);
544
545 if (drm_is_afbc(image->layout.modifier)) {
546 #if PAN_ARCH >= 9
547 if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
548 cfg->afbc.yuv_transform = true;
549
550 cfg->afbc.wide_block = panfrost_afbc_is_wide(image->layout.modifier);
551 cfg->afbc.split_block =
552 (image->layout.modifier & AFBC_FORMAT_MOD_SPLIT);
553 cfg->afbc.header = surf.afbc.header;
554 cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header;
555 assert(surf.afbc.body >= surf.afbc.header);
556
557 cfg->afbc.compression_mode = GENX(pan_afbc_compression_mode)(rt->format);
558 cfg->afbc.row_stride = row_stride;
559 #else
560 const struct pan_image_slice_layout *slice = &image->layout.slices[level];
561
562 #if PAN_ARCH >= 6
563 cfg->afbc.row_stride =
564 pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
565 cfg->afbc.afbc_wide_block_enable =
566 panfrost_afbc_is_wide(image->layout.modifier);
567 cfg->afbc.afbc_split_block_enable =
568 (image->layout.modifier & AFBC_FORMAT_MOD_SPLIT);
569 #else
570 cfg->afbc.chunk_size = 9;
571 cfg->afbc.sparse = true;
572 cfg->afbc.body_size = slice->afbc.body_size;
573 #endif
574
575 cfg->afbc.header = surf.afbc.header;
576 cfg->afbc.body = surf.afbc.body;
577
578 if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
579 cfg->afbc.yuv_transform_enable = true;
580 #endif
581 #if PAN_ARCH >= 10
582 } else if (drm_is_afrc(image->layout.modifier)) {
583 struct pan_afrc_format_info finfo =
584 panfrost_afrc_get_format_info(image->layout.format);
585
586 cfg->writeback_mode = MALI_WRITEBACK_MODE_AFRC_RGB;
587 cfg->afrc.block_size =
588 GENX(pan_afrc_block_size)(image->layout.modifier, 0);
589 cfg->afrc.format =
590 GENX(pan_afrc_format)(finfo, image->layout.modifier, 0);
591
592 cfg->rgb.base = surf.data;
593 cfg->rgb.row_stride = row_stride;
594 cfg->rgb.surface_stride = layer_stride;
595 #endif
596 } else {
597 assert(image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
598 image->layout.modifier ==
599 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
600 cfg->rgb.base = surf.data;
601 cfg->rgb.row_stride = row_stride;
602 cfg->rgb.surface_stride = layer_stride;
603 }
604 }
605 #endif
606
607 void
GENX(pan_emit_tls)608 GENX(pan_emit_tls)(const struct pan_tls_info *info,
609 struct mali_local_storage_packed *out)
610 {
611 pan_pack(out, LOCAL_STORAGE, cfg) {
612 if (info->tls.size) {
613 unsigned shift = panfrost_get_stack_shift(info->tls.size);
614
615 cfg.tls_size = shift;
616 #if PAN_ARCH >= 9
617 /* For now, always use packed TLS addressing. This is
618 * better for the cache and requires no fix up code in
619 * the shader. We may need to revisit this someday for
620 * OpenCL generic pointer support.
621 */
622 cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED;
623
624 assert((info->tls.ptr & 4095) == 0);
625 cfg.tls_base_pointer = info->tls.ptr >> 8;
626 #else
627 cfg.tls_base_pointer = info->tls.ptr;
628 #endif
629 }
630
631 if (info->wls.size) {
632 assert(!(info->wls.ptr & 4095));
633 assert((info->wls.ptr & 0xffffffff00000000ULL) ==
634 ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
635 cfg.wls_base_pointer = info->wls.ptr;
636 unsigned wls_size = pan_wls_adjust_size(info->wls.size);
637 cfg.wls_instances = info->wls.instances;
638 cfg.wls_size_scale = util_logbase2(wls_size) + 1;
639 } else {
640 cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
641 }
642 }
643 }
644
645 #if PAN_ARCH <= 5
646 static void
pan_emit_midgard_tiler(const struct pan_fb_info * fb,const struct pan_tiler_context * tiler_ctx,struct mali_tiler_context_packed * out)647 pan_emit_midgard_tiler(const struct pan_fb_info *fb,
648 const struct pan_tiler_context *tiler_ctx,
649 struct mali_tiler_context_packed *out)
650 {
651 bool hierarchy = !tiler_ctx->midgard.no_hierarchical_tiling;
652
653 assert(tiler_ctx->midgard.polygon_list);
654
655 pan_pack(out, TILER_CONTEXT, cfg) {
656 unsigned header_size;
657
658 if (tiler_ctx->midgard.disable) {
659 cfg.hierarchy_mask =
660 hierarchy ? MALI_MIDGARD_TILER_DISABLED : MALI_MIDGARD_TILER_USER;
661 header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
662 cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
663 cfg.heap_start = tiler_ctx->midgard.polygon_list;
664 cfg.heap_end = tiler_ctx->midgard.polygon_list;
665 } else {
666 cfg.hierarchy_mask = panfrost_choose_hierarchy_mask(
667 fb->width, fb->height, tiler_ctx->midgard.vertex_count, hierarchy);
668 header_size = panfrost_tiler_header_size(
669 fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
670 cfg.polygon_list_size = panfrost_tiler_full_size(
671 fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
672 cfg.heap_start = tiler_ctx->midgard.heap.start;
673 cfg.heap_end = cfg.heap_start + tiler_ctx->midgard.heap.size;
674 }
675
676 cfg.polygon_list = tiler_ctx->midgard.polygon_list;
677 cfg.polygon_list_body = cfg.polygon_list + header_size;
678 }
679 }
680 #endif
681
682 #if PAN_ARCH >= 5
683 static void
pan_emit_rt(const struct pan_fb_info * fb,unsigned layer_idx,unsigned idx,unsigned cbuf_offset,struct mali_render_target_packed * out)684 pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx, unsigned idx,
685 unsigned cbuf_offset, struct mali_render_target_packed *out)
686 {
687 pan_pack(out, RENDER_TARGET, cfg) {
688 pan_prepare_rt(fb, layer_idx, idx, cbuf_offset, &cfg);
689 }
690 }
691
692 #if PAN_ARCH >= 6
693 /* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
694 *
695 * Forcing clean_tile_writes breaks INTERSECT readbacks
696 *
697 * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
698 * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
699 * ignore, this cannot affect correctness, only performance */
700
701 static enum mali_pre_post_frame_shader_mode
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,bool force_clean_tile)702 pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
703 bool force_clean_tile)
704 {
705 if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
706 return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
707 else
708 return mode;
709 }
710
711 /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
712 * the effective tile size differs from the superblock size of any enabled AFBC
713 * render target. Check this condition. */
714
715 static bool
pan_force_clean_write_on(const struct pan_image * image,unsigned tile_size)716 pan_force_clean_write_on(const struct pan_image *image, unsigned tile_size)
717 {
718 if (!image)
719 return false;
720
721 if (!drm_is_afbc(image->layout.modifier))
722 return false;
723
724 struct pan_block_size renderblk_sz =
725 panfrost_afbc_renderblock_size(image->layout.modifier);
726
727 assert(renderblk_sz.width >= 16 && renderblk_sz.height >= 16);
728 assert(tile_size <= panfrost_max_effective_tile_size(PAN_ARCH));
729
730 return tile_size != renderblk_sz.width * renderblk_sz.height;
731 }
732
733 static bool
pan_force_clean_write(const struct pan_fb_info * fb,unsigned tile_size)734 pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
735 {
736 /* Maximum tile size */
737 assert(tile_size <= panfrost_max_effective_tile_size(PAN_ARCH));
738
739 for (unsigned i = 0; i < fb->rt_count; ++i) {
740 if (!fb->rts[i].view || fb->rts[i].discard)
741 continue;
742
743 const struct pan_image *img =
744 pan_image_view_get_color_plane(fb->rts[i].view);
745
746 if (pan_force_clean_write_on(img, tile_size))
747 return true;
748 }
749
750 if (fb->zs.view.zs && !fb->zs.discard.z &&
751 pan_force_clean_write_on(pan_image_view_get_zs_plane(fb->zs.view.zs),
752 tile_size))
753 return true;
754
755 if (fb->zs.view.s && !fb->zs.discard.s &&
756 pan_force_clean_write_on(pan_image_view_get_s_plane(fb->zs.view.s),
757 tile_size))
758 return true;
759
760 return false;
761 }
762
763 #endif
764
765 unsigned
GENX(pan_emit_fbd)766 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
767 const struct pan_tls_info *tls,
768 const struct pan_tiler_context *tiler_ctx, void *out)
769 {
770 void *fbd = out;
771 void *rtd = out + pan_size(FRAMEBUFFER);
772
773 #if PAN_ARCH <= 5
774 GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
775 #endif
776
777 int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size);
778 bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
779
780 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
781 #if PAN_ARCH >= 6
782 bool force_clean_write = pan_force_clean_write(fb, fb->tile_size);
783
784 cfg.sample_locations = fb->sample_positions;
785 cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
786 force_clean_write);
787 cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
788 force_clean_write);
789 cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2],
790 force_clean_write);
791 #if PAN_ARCH <= 7
792 /* On Bifrost, the layer_id is passed through a push_uniform, which forces
793 * us to have one pre/post DCD array per layer. */
794 cfg.frame_shader_dcds =
795 fb->bifrost.pre_post.dcds.gpu + (layer_idx * 3 * pan_size(DRAW));
796 #else
797 /* On Valhall, layer_id is passed through the framebuffer frame_arg, which
798 * is preloaded in r62, so we can use the same pre/post DCD array for all
799 * layers. */
800 cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
801 #endif
802 cfg.tiler =
803 PAN_ARCH >= 9 ? tiler_ctx->valhall.desc : tiler_ctx->bifrost.desc;
804 #endif
805 cfg.width = fb->width;
806 cfg.height = fb->height;
807 cfg.bound_max_x = fb->width - 1;
808 cfg.bound_max_y = fb->height - 1;
809
810 cfg.effective_tile_size = fb->tile_size;
811 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
812 cfg.render_target_count = MAX2(fb->rt_count, 1);
813
814 /* Default to 24 bit depth if there's no surface. */
815 cfg.z_internal_format =
816 fb->zs.view.zs ? panfrost_get_z_internal_format(fb->zs.view.zs->format)
817 : MALI_Z_INTERNAL_FORMAT_D24;
818
819 cfg.z_clear = fb->zs.clear_value.depth;
820 cfg.s_clear = fb->zs.clear_value.stencil;
821 cfg.color_buffer_allocation = fb->cbuf_allocation;
822
823 /* The force_samples setting dictates the sample-count that is used
824 * for rasterization, and works like D3D11's ForcedSampleCount feature:
825 *
826 * - If force_samples == 0: Let nr_samples dictate sample count
827 * - If force_samples == 1: force single-sampled rasterization
828 * - If force_samples >= 1: force multi-sampled rasterization
829 *
830 * This can be used to read SYSTEM_VALUE_SAMPLE_MASK_IN from the
831 * fragment shader, even when performing single-sampled rendering.
832 */
833 if (!fb->force_samples) {
834 cfg.sample_count = fb->nr_samples;
835 cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
836 } else if (fb->force_samples == 1) {
837 cfg.sample_count = fb->nr_samples;
838 cfg.sample_pattern = pan_sample_pattern(1);
839 } else {
840 cfg.sample_count = 1;
841 cfg.sample_pattern = pan_sample_pattern(fb->force_samples);
842 }
843
844 cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
845 cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
846 cfg.has_zs_crc_extension = has_zs_crc_ext;
847
848 if (crc_rt >= 0) {
849 bool *valid = fb->rts[crc_rt].crc_valid;
850 bool full = !fb->extent.minx && !fb->extent.miny &&
851 fb->extent.maxx == (fb->width - 1) &&
852 fb->extent.maxy == (fb->height - 1);
853 bool clean_tile_write = fb->rts[crc_rt].clear;
854
855 #if PAN_ARCH >= 6
856 clean_tile_write |= pan_force_clean_write_on(
857 pan_image_view_get_color_plane(fb->rts[crc_rt].view),
858 fb->tile_size);
859 #endif
860
861 /* If the CRC was valid it stays valid, if it wasn't, we must ensure
862 * the render operation covers the full frame, and clean tiles are
863 * pushed to memory. */
864 bool new_valid = *valid | (full && clean_tile_write);
865
866 cfg.crc_read_enable = *valid;
867
868 /* If the data is currently invalid, still write CRC
869 * data if we are doing a full write, so that it is
870 * valid for next time. */
871 cfg.crc_write_enable = new_valid;
872
873 *valid = new_valid;
874 }
875
876 #if PAN_ARCH >= 9
877 cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
878 cfg.first_provoking_vertex = fb->first_provoking_vertex;
879
880 /* internal_layer_index is used to select the right primitive list in the
881 * tiler context, and frame_arg is the value that's passed to the fragment
882 * shader through r62-r63, which we use to pass gl_Layer. Since the
883 * layer_idx only takes 8-bits, we might use the extra 56-bits we have
884 * in frame_argument to pass other information to the fragment shader at
885 * some point. */
886 assert(layer_idx >= tiler_ctx->valhall.layer_offset);
887 cfg.internal_layer_index = layer_idx - tiler_ctx->valhall.layer_offset;
888 cfg.frame_argument = layer_idx;
889 #endif
890 }
891
892 #if PAN_ARCH >= 6
893 pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding)
894 ;
895 #else
896 pan_emit_midgard_tiler(fb, tiler_ctx,
897 pan_section_ptr(fbd, FRAMEBUFFER, TILER));
898
899 /* All weights set to 0, nothing to do here */
900 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
901 ;
902 #endif
903
904 if (has_zs_crc_ext) {
905 struct mali_zs_crc_extension_packed *zs_crc_ext =
906 out + pan_size(FRAMEBUFFER);
907
908 pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext);
909 rtd += pan_size(ZS_CRC_EXTENSION);
910 }
911
912 unsigned rt_count = MAX2(fb->rt_count, 1);
913 unsigned cbuf_offset = 0;
914 for (unsigned i = 0; i < rt_count; i++) {
915 pan_emit_rt(fb, layer_idx, i, cbuf_offset, rtd);
916 rtd += pan_size(RENDER_TARGET);
917 if (!fb->rts[i].view)
918 continue;
919
920 cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
921 fb->tile_size * pan_image_view_get_nr_samples(fb->rts[i].view);
922
923 if (i != crc_rt)
924 *(fb->rts[i].crc_valid) = false;
925 }
926
927 struct mali_framebuffer_pointer_packed tag;
928 pan_pack(&tag, FRAMEBUFFER_POINTER, cfg) {
929 cfg.zs_crc_extension_present = has_zs_crc_ext;
930 cfg.render_target_count = MAX2(fb->rt_count, 1);
931 }
932 return tag.opaque[0];
933 }
934 #else /* PAN_ARCH == 4 */
935 static enum mali_color_format
pan_sfbd_raw_format(unsigned bits)936 pan_sfbd_raw_format(unsigned bits)
937 {
938 /* clang-format off */
939 switch (bits) {
940 case 16: return MALI_COLOR_FORMAT_1_16B_CHANNEL;
941 case 32: return MALI_COLOR_FORMAT_1_32B_CHANNEL;
942 case 48: return MALI_COLOR_FORMAT_3_16B_CHANNELS;
943 case 64: return MALI_COLOR_FORMAT_2_32B_CHANNELS;
944 case 96: return MALI_COLOR_FORMAT_3_32B_CHANNELS;
945 case 128: return MALI_COLOR_FORMAT_4_32B_CHANNELS;
946 default: unreachable("invalid raw bpp");
947 }
948 /* clang-format on */
949 }
950
951 void
GENX(pan_select_tile_size)952 GENX(pan_select_tile_size)(struct pan_fb_info *fb)
953 {
954 /* Tile size and color buffer allocation are not configurable on gen 4 */
955 fb->tile_size = 16 * 16;
956 }
957
958 unsigned
GENX(pan_emit_fbd)959 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
960 const struct pan_tls_info *tls,
961 const struct pan_tiler_context *tiler_ctx, void *fbd)
962 {
963 assert(fb->rt_count <= 1);
964
965 GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
966 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
967 cfg.bound_max_x = fb->width - 1;
968 cfg.bound_max_y = fb->height - 1;
969 cfg.dithering_enable = true;
970 cfg.clean_pixel_write_enable = true;
971 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
972 if (fb->rts[0].clear) {
973 cfg.clear_color_0 = fb->rts[0].clear_value[0];
974 cfg.clear_color_1 = fb->rts[0].clear_value[1];
975 cfg.clear_color_2 = fb->rts[0].clear_value[2];
976 cfg.clear_color_3 = fb->rts[0].clear_value[3];
977 }
978
979 if (fb->zs.clear.z)
980 cfg.z_clear = fb->zs.clear_value.depth;
981
982 if (fb->zs.clear.s)
983 cfg.s_clear = fb->zs.clear_value.stencil;
984
985 if (fb->rt_count && fb->rts[0].view) {
986 const struct pan_image_view *rt = fb->rts[0].view;
987 const struct pan_image *image = pan_image_view_get_color_plane(rt);
988
989 const struct util_format_description *desc =
990 util_format_description(rt->format);
991
992 /* The swizzle for rendering is inverted from texturing */
993 unsigned char swizzle[4];
994 panfrost_invert_swizzle(desc->swizzle, swizzle);
995 cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
996
997 struct pan_blendable_format fmt =
998 *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
999
1000 if (fmt.internal) {
1001 cfg.internal_format = fmt.internal;
1002 cfg.color_writeback_format = fmt.writeback;
1003 } else {
1004 /* Construct RAW internal/writeback */
1005 unsigned bits = desc->block.bits;
1006
1007 cfg.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE;
1008 cfg.color_writeback_format = pan_sfbd_raw_format(bits);
1009 }
1010
1011 unsigned level = rt->first_level;
1012 struct pan_surface surf;
1013
1014 pan_iview_get_surface(rt, 0, 0, 0, &surf);
1015
1016 cfg.color_write_enable = !fb->rts[0].discard;
1017 cfg.color_writeback.base = surf.data;
1018 cfg.color_writeback.row_stride =
1019 image->layout.slices[level].row_stride;
1020
1021 cfg.color_block_format = mod_to_block_fmt(image->layout.modifier);
1022 assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
1023 cfg.color_block_format ==
1024 MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
1025
1026 if (pan_image_view_has_crc(rt)) {
1027 const struct pan_image_slice_layout *slice =
1028 &image->layout.slices[level];
1029
1030 cfg.crc_buffer.row_stride = slice->crc.stride;
1031 cfg.crc_buffer.base =
1032 image->data.base + image->data.offset + slice->crc.offset;
1033 }
1034 }
1035
1036 if (fb->zs.view.zs) {
1037 const struct pan_image_view *zs = fb->zs.view.zs;
1038 const struct pan_image *image = pan_image_view_get_zs_plane(zs);
1039 unsigned level = zs->first_level;
1040 struct pan_surface surf;
1041
1042 pan_iview_get_surface(zs, 0, 0, 0, &surf);
1043
1044 cfg.zs_write_enable = !fb->zs.discard.z;
1045 cfg.zs_writeback.base = surf.data;
1046 cfg.zs_writeback.row_stride = image->layout.slices[level].row_stride;
1047 cfg.zs_block_format = mod_to_block_fmt(image->layout.modifier);
1048 assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
1049 cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
1050
1051 cfg.zs_format = translate_zs_format(zs->format);
1052 }
1053
1054 cfg.sample_count = fb->nr_samples;
1055
1056 if (fb->rt_count)
1057 cfg.msaa = mali_sampling_mode(fb->rts[0].view);
1058 }
1059
1060 pan_emit_midgard_tiler(fb, tiler_ctx,
1061 pan_section_ptr(fbd, FRAMEBUFFER, TILER));
1062
1063 /* All weights set to 0, nothing to do here */
1064 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
1065 ;
1066
1067 pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding)
1068 ;
1069 pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding)
1070 ;
1071 return 0;
1072 }
1073 #endif
1074
1075 #if PAN_ARCH <= 9
1076 void
GENX(pan_emit_fragment_job_payload)1077 GENX(pan_emit_fragment_job_payload)(const struct pan_fb_info *fb, uint64_t fbd,
1078 void *out)
1079 {
1080 pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
1081 payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
1082 payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
1083 payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
1084 payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
1085 payload.framebuffer = fbd;
1086
1087 #if PAN_ARCH >= 5
1088 if (fb->tile_map.base) {
1089 payload.has_tile_enable_map = true;
1090 payload.tile_enable_map = fb->tile_map.base;
1091 payload.tile_enable_map_row_stride = fb->tile_map.stride;
1092 }
1093 #endif
1094 }
1095 }
1096 #endif
1097