1 /*
2 * Copyright (C) 2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 * Boris Brezillon <boris.brezillon@collabora.com>
26 */
27
28 #include "util/macros.h"
29
30 #include "panfrost-quirks.h"
31
32 #include "pan_cs.h"
33 #include "pan_encoder.h"
34 #include "pan_texture.h"
35
36 static unsigned
mod_to_block_fmt(uint64_t mod)37 mod_to_block_fmt(uint64_t mod)
38 {
39 switch (mod) {
40 case DRM_FORMAT_MOD_LINEAR:
41 return MALI_BLOCK_FORMAT_LINEAR;
42 case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
43 return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
44 default:
45 #if PAN_ARCH >= 5
46 if (drm_is_afbc(mod))
47 return MALI_BLOCK_FORMAT_AFBC;
48 #endif
49
50 unreachable("Unsupported modifer");
51 }
52 }
53
54 static enum mali_msaa
mali_sampling_mode(const struct pan_image_view * view)55 mali_sampling_mode(const struct pan_image_view *view)
56 {
57 if (view->image->layout.nr_samples > 1) {
58 assert(view->nr_samples == view->image->layout.nr_samples);
59 assert(view->image->layout.slices[0].surface_stride != 0);
60 return MALI_MSAA_LAYERED;
61 }
62
63 if (view->nr_samples > view->image->layout.nr_samples) {
64 assert(view->image->layout.nr_samples == 1);
65 return MALI_MSAA_AVERAGE;
66 }
67
68 assert(view->nr_samples == view->image->layout.nr_samples);
69 assert(view->nr_samples == 1);
70
71 return MALI_MSAA_SINGLE;
72 }
73
74 static inline enum mali_sample_pattern
pan_sample_pattern(unsigned samples)75 pan_sample_pattern(unsigned samples)
76 {
77 switch (samples) {
78 case 1: return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
79 case 4: return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
80 case 8: return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
81 case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
82 default: unreachable("Unsupported sample count");
83 }
84 }
85
86 int
GENX(pan_select_crc_rt)87 GENX(pan_select_crc_rt)(const struct pan_fb_info *fb)
88 {
89 #if PAN_ARCH <= 6
90 if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
91 fb->rts[0].view->image->layout.crc_mode != PAN_IMAGE_CRC_NONE)
92 return 0;
93
94 return -1;
95 #else
96 bool best_rt_valid = false;
97 int best_rt = -1;
98
99 for (unsigned i = 0; i < fb->rt_count; i++) {
100 if (!fb->rts[i].view || fb->rts[0].discard ||
101 fb->rts[i].view->image->layout.crc_mode == PAN_IMAGE_CRC_NONE)
102 continue;
103
104 bool valid = *(fb->rts[i].crc_valid);
105 bool full = !fb->extent.minx && !fb->extent.miny &&
106 fb->extent.maxx == (fb->width - 1) &&
107 fb->extent.maxy == (fb->height - 1);
108 if (!full && !valid)
109 continue;
110
111 if (best_rt < 0 || (valid && !best_rt_valid)) {
112 best_rt = i;
113 best_rt_valid = valid;
114 }
115
116 if (valid)
117 break;
118 }
119
120 return best_rt;
121 #endif
122 }
123
124 static enum mali_zs_format
translate_zs_format(enum pipe_format in)125 translate_zs_format(enum pipe_format in)
126 {
127 switch (in) {
128 case PIPE_FORMAT_Z16_UNORM: return MALI_ZS_FORMAT_D16;
129 case PIPE_FORMAT_Z24_UNORM_S8_UINT: return MALI_ZS_FORMAT_D24S8;
130 case PIPE_FORMAT_Z24X8_UNORM: return MALI_ZS_FORMAT_D24X8;
131 case PIPE_FORMAT_Z32_FLOAT: return MALI_ZS_FORMAT_D32;
132 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_ZS_FORMAT_D32_S8X24;
133 default: unreachable("Unsupported depth/stencil format.");
134 }
135 }
136
137 #if PAN_ARCH >= 5
138 static enum mali_s_format
translate_s_format(enum pipe_format in)139 translate_s_format(enum pipe_format in)
140 {
141 switch (in) {
142 case PIPE_FORMAT_S8_UINT: return MALI_S_FORMAT_S8;
143 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
144 case PIPE_FORMAT_S8X24_UINT:
145 return MALI_S_FORMAT_S8X24;
146 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
147 case PIPE_FORMAT_X24S8_UINT:
148 return MALI_S_FORMAT_X24S8;
149 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
150 return MALI_S_FORMAT_X32_S8X24;
151 default:
152 unreachable("Unsupported stencil format.");
153 }
154 }
155
156 static void
pan_prepare_s(const struct pan_fb_info * fb,struct MALI_ZS_CRC_EXTENSION * ext)157 pan_prepare_s(const struct pan_fb_info *fb,
158 struct MALI_ZS_CRC_EXTENSION *ext)
159 {
160 const struct pan_image_view *s = fb->zs.view.s;
161
162 if (!s)
163 return;
164
165 unsigned level = s->first_level;
166
167 ext->s_msaa = mali_sampling_mode(s);
168
169 struct pan_surface surf;
170 pan_iview_get_surface(s, 0, 0, 0, &surf);
171
172 assert(s->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
173 s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
174 ext->s_writeback_base = surf.data;
175 ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride;
176 ext->s_writeback_surface_stride =
177 (s->image->layout.nr_samples > 1) ?
178 s->image->layout.slices[level].surface_stride : 0;
179 ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier);
180 ext->s_write_format = translate_s_format(s->format);
181 }
182
183 static void
pan_prepare_zs(const struct pan_fb_info * fb,struct MALI_ZS_CRC_EXTENSION * ext)184 pan_prepare_zs(const struct pan_fb_info *fb,
185 struct MALI_ZS_CRC_EXTENSION *ext)
186 {
187 const struct pan_image_view *zs = fb->zs.view.zs;
188
189 if (!zs)
190 return;
191
192 unsigned level = zs->first_level;
193
194 ext->zs_msaa = mali_sampling_mode(zs);
195
196 struct pan_surface surf;
197 pan_iview_get_surface(zs, 0, 0, 0, &surf);
198
199 if (drm_is_afbc(zs->image->layout.modifier)) {
200 #if PAN_ARCH >= 6
201 const struct pan_image_slice_layout *slice = &zs->image->layout.slices[level];
202
203 ext->zs_afbc_row_stride = slice->afbc.row_stride /
204 AFBC_HEADER_BYTES_PER_TILE;
205 #else
206 ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
207 ext->zs_afbc_body_size = 0x1000;
208 ext->zs_afbc_chunk_size = 9;
209 ext->zs_afbc_sparse = true;
210 #endif
211
212 ext->zs_afbc_header = surf.afbc.header;
213 ext->zs_afbc_body = surf.afbc.body;
214 } else {
215 assert(zs->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
216 zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
217
218 /* TODO: Z32F(S8) support, which is always linear */
219
220 ext->zs_writeback_base = surf.data;
221 ext->zs_writeback_row_stride =
222 zs->image->layout.slices[level].row_stride;
223 ext->zs_writeback_surface_stride =
224 (zs->image->layout.nr_samples > 1) ?
225 zs->image->layout.slices[level].surface_stride : 0;
226 }
227
228 ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
229 ext->zs_write_format = translate_zs_format(zs->format);
230 if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
231 ext->s_writeback_base = ext->zs_writeback_base;
232 }
233
234 static void
pan_prepare_crc(const struct pan_fb_info * fb,int rt_crc,struct MALI_ZS_CRC_EXTENSION * ext)235 pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
236 struct MALI_ZS_CRC_EXTENSION *ext)
237 {
238 if (rt_crc < 0)
239 return;
240
241 assert(rt_crc < fb->rt_count);
242
243 const struct pan_image_view *rt = fb->rts[rt_crc].view;
244 const struct pan_image_slice_layout *slice = &rt->image->layout.slices[rt->first_level];
245 ext->crc_base = (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND ?
246 (rt->image->data.bo->ptr.gpu + rt->image->data.offset) :
247 (rt->image->crc.bo->ptr.gpu + rt->image->crc.offset)) +
248 slice->crc.offset;
249 ext->crc_row_stride = slice->crc.stride;
250
251 #if PAN_ARCH >= 7
252 ext->crc_render_target = rt_crc;
253
254 if (fb->rts[rt_crc].clear) {
255 uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
256 ext->crc_clear_color = clear_val | 0xc000000000000000 |
257 (((uint64_t)clear_val & 0xffff) << 32);
258 }
259 #endif
260 }
261
262 static void
pan_emit_zs_crc_ext(const struct pan_fb_info * fb,int rt_crc,void * zs_crc_ext)263 pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc,
264 void *zs_crc_ext)
265 {
266 pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
267 pan_prepare_crc(fb, rt_crc, &cfg);
268 cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
269 pan_prepare_zs(fb, &cfg);
270 pan_prepare_s(fb, &cfg);
271 }
272 }
273
274 /* Measure format as it appears in the tile buffer */
275
276 static unsigned
pan_bytes_per_pixel_tib(enum pipe_format format)277 pan_bytes_per_pixel_tib(enum pipe_format format)
278 {
279 if (panfrost_blendable_formats_v7[format].internal) {
280 /* Blendable formats are always 32-bits in the tile buffer,
281 * extra bits are used as padding or to dither */
282 return 4;
283 } else {
284 /* Non-blendable formats are raw, rounded up to the nearest
285 * power-of-two size */
286 unsigned bytes = util_format_get_blocksize(format);
287 return util_next_power_of_two(bytes);
288 }
289 }
290
291 static unsigned
pan_internal_cbuf_size(const struct pan_fb_info * fb,unsigned * tile_size)292 pan_internal_cbuf_size(const struct pan_fb_info *fb,
293 unsigned *tile_size)
294 {
295 unsigned total_size = 0;
296
297 *tile_size = 16 * 16;
298 for (int cb = 0; cb < fb->rt_count; ++cb) {
299 const struct pan_image_view *rt = fb->rts[cb].view;
300
301 if (!rt)
302 continue;
303
304 total_size += pan_bytes_per_pixel_tib(rt->format) *
305 rt->nr_samples * (*tile_size);
306 }
307
308 /* We have a 4KB budget, let's reduce the tile size until it fits. */
309 while (total_size > 4096) {
310 total_size >>= 1;
311 *tile_size >>= 1;
312 }
313
314 /* Align on 1k. */
315 total_size = ALIGN_POT(total_size, 1024);
316
317 /* Minimum tile size is 4x4. */
318 assert(*tile_size >= 4 * 4);
319 return total_size;
320 }
321
322 static enum mali_color_format
pan_mfbd_raw_format(unsigned bits)323 pan_mfbd_raw_format(unsigned bits)
324 {
325 switch (bits) {
326 case 8: return MALI_COLOR_FORMAT_RAW8;
327 case 16: return MALI_COLOR_FORMAT_RAW16;
328 case 24: return MALI_COLOR_FORMAT_RAW24;
329 case 32: return MALI_COLOR_FORMAT_RAW32;
330 case 48: return MALI_COLOR_FORMAT_RAW48;
331 case 64: return MALI_COLOR_FORMAT_RAW64;
332 case 96: return MALI_COLOR_FORMAT_RAW96;
333 case 128: return MALI_COLOR_FORMAT_RAW128;
334 case 192: return MALI_COLOR_FORMAT_RAW192;
335 case 256: return MALI_COLOR_FORMAT_RAW256;
336 case 384: return MALI_COLOR_FORMAT_RAW384;
337 case 512: return MALI_COLOR_FORMAT_RAW512;
338 case 768: return MALI_COLOR_FORMAT_RAW768;
339 case 1024: return MALI_COLOR_FORMAT_RAW1024;
340 case 1536: return MALI_COLOR_FORMAT_RAW1536;
341 case 2048: return MALI_COLOR_FORMAT_RAW2048;
342 default: unreachable("invalid raw bpp");
343 }
344 }
345
346 static void
pan_rt_init_format(const struct pan_image_view * rt,struct MALI_RENDER_TARGET * cfg)347 pan_rt_init_format(const struct pan_image_view *rt,
348 struct MALI_RENDER_TARGET *cfg)
349 {
350 /* Explode details on the format */
351
352 const struct util_format_description *desc =
353 util_format_description(rt->format);
354
355 /* The swizzle for rendering is inverted from texturing */
356
357 unsigned char swizzle[4] = {
358 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
359 };
360
361 /* Fill in accordingly, defaulting to 8-bit UNORM */
362
363 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
364 cfg->srgb = true;
365
366 struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
367
368 if (fmt.internal) {
369 cfg->internal_format = fmt.internal;
370 cfg->writeback_format = fmt.writeback;
371 panfrost_invert_swizzle(desc->swizzle, swizzle);
372 } else {
373 /* Construct RAW internal/writeback, where internal is
374 * specified logarithmically (round to next power-of-two).
375 * Offset specified from RAW8, where 8 = 2^3 */
376
377 unsigned bits = desc->block.bits;
378 unsigned offset = util_logbase2_ceil(bits) - 3;
379 assert(offset <= 4);
380
381 cfg->internal_format =
382 MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
383
384 cfg->writeback_format = pan_mfbd_raw_format(bits);
385 }
386
387 cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
388 }
389
390 static void
pan_prepare_rt(const struct pan_fb_info * fb,unsigned idx,unsigned cbuf_offset,struct MALI_RENDER_TARGET * cfg)391 pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx,
392 unsigned cbuf_offset,
393 struct MALI_RENDER_TARGET *cfg)
394 {
395 cfg->clean_pixel_write_enable = fb->rts[idx].clear;
396 cfg->internal_buffer_offset = cbuf_offset;
397 if (fb->rts[idx].clear) {
398 cfg->clear.color_0 = fb->rts[idx].clear_value[0];
399 cfg->clear.color_1 = fb->rts[idx].clear_value[1];
400 cfg->clear.color_2 = fb->rts[idx].clear_value[2];
401 cfg->clear.color_3 = fb->rts[idx].clear_value[3];
402 }
403
404 const struct pan_image_view *rt = fb->rts[idx].view;
405 if (!rt || fb->rts[idx].discard) {
406 cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
407 cfg->internal_buffer_offset = cbuf_offset;
408 #if PAN_ARCH >= 7
409 cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
410 cfg->dithering_enable = true;
411 #endif
412 return;
413 }
414
415 cfg->write_enable = true;
416 cfg->dithering_enable = true;
417
418 unsigned level = rt->first_level;
419 assert(rt->last_level == rt->first_level);
420 assert(rt->last_layer == rt->first_layer);
421
422 int row_stride = rt->image->layout.slices[level].row_stride;
423
424 /* Only set layer_stride for layered MSAA rendering */
425
426 unsigned layer_stride =
427 (rt->image->layout.nr_samples > 1) ?
428 rt->image->layout.slices[level].surface_stride : 0;
429
430 cfg->writeback_msaa = mali_sampling_mode(rt);
431
432 pan_rt_init_format(rt, cfg);
433
434 #if PAN_ARCH <= 5
435 cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
436 #else
437 cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
438 #endif
439
440 struct pan_surface surf;
441 pan_iview_get_surface(rt, 0, 0, 0, &surf);
442
443 if (drm_is_afbc(rt->image->layout.modifier)) {
444 const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level];
445
446 #if PAN_ARCH >= 6
447 cfg->afbc.row_stride = slice->afbc.row_stride /
448 AFBC_HEADER_BYTES_PER_TILE;
449 cfg->afbc.afbc_wide_block_enable =
450 panfrost_block_dim(rt->image->layout.modifier, true, 0) > 16;
451 #else
452 cfg->afbc.chunk_size = 9;
453 cfg->afbc.sparse = true;
454 cfg->afbc.body_size = slice->afbc.body_size;
455 #endif
456
457 cfg->afbc.header = surf.afbc.header;
458 cfg->afbc.body = surf.afbc.body;
459
460 if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
461 cfg->afbc.yuv_transform_enable = true;
462 } else {
463 assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
464 rt->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
465 cfg->rgb.base = surf.data;
466 cfg->rgb.row_stride = row_stride;
467 cfg->rgb.surface_stride = layer_stride;
468 }
469 }
470 #endif
471
472 void
GENX(pan_emit_tls)473 GENX(pan_emit_tls)(const struct pan_tls_info *info,
474 void *out)
475 {
476 pan_pack(out, LOCAL_STORAGE, cfg) {
477 if (info->tls.size) {
478 unsigned shift =
479 panfrost_get_stack_shift(info->tls.size);
480
481 cfg.tls_size = shift;
482 cfg.tls_base_pointer = info->tls.ptr;
483 }
484
485 if (info->wls.size) {
486 assert(!(info->wls.ptr & 4095));
487 assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
488 cfg.wls_base_pointer = info->wls.ptr;
489 unsigned wls_size = pan_wls_adjust_size(info->wls.size);
490 cfg.wls_instances = pan_wls_instances(&info->wls.dim);
491 cfg.wls_size_scale = util_logbase2(wls_size) + 1;
492 } else {
493 cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
494 }
495 }
496 }
497
498 #if PAN_ARCH <= 5
499 static void
pan_emit_midgard_tiler(const struct panfrost_device * dev,const struct pan_fb_info * fb,const struct pan_tiler_context * tiler_ctx,void * out)500 pan_emit_midgard_tiler(const struct panfrost_device *dev,
501 const struct pan_fb_info *fb,
502 const struct pan_tiler_context *tiler_ctx,
503 void *out)
504 {
505 bool hierarchy = !(dev->quirks & MIDGARD_NO_HIER_TILING);
506
507 assert(tiler_ctx->midgard.polygon_list->ptr.gpu);
508
509 pan_pack(out, TILER_CONTEXT, cfg) {
510 unsigned header_size;
511
512 if (tiler_ctx->midgard.disable) {
513 cfg.hierarchy_mask =
514 hierarchy ?
515 MALI_MIDGARD_TILER_DISABLED :
516 MALI_MIDGARD_TILER_USER;
517 header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
518 cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
519 cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu;
520 cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu;
521 } else {
522 cfg.hierarchy_mask =
523 panfrost_choose_hierarchy_mask(fb->width,
524 fb->height,
525 1, hierarchy);
526 header_size = panfrost_tiler_header_size(fb->width,
527 fb->height,
528 cfg.hierarchy_mask,
529 hierarchy);
530 cfg.polygon_list_size =
531 panfrost_tiler_full_size(fb->width, fb->height,
532 cfg.hierarchy_mask,
533 hierarchy);
534 cfg.heap_start = dev->tiler_heap->ptr.gpu;
535 cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
536 }
537
538 cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu;
539 cfg.polygon_list_body = cfg.polygon_list + header_size;
540 }
541 }
542 #endif
543
544 #if PAN_ARCH >= 5
545 static void
pan_emit_rt(const struct pan_fb_info * fb,unsigned idx,unsigned cbuf_offset,void * out)546 pan_emit_rt(const struct pan_fb_info *fb,
547 unsigned idx, unsigned cbuf_offset, void *out)
548 {
549 pan_pack(out, RENDER_TARGET, cfg) {
550 pan_prepare_rt(fb, idx, cbuf_offset, &cfg);
551 }
552 }
553
554 #if PAN_ARCH >= 6
555 /* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
556 *
557 * Forcing clean_tile_writes breaks INTERSECT readbacks
558 *
559 * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
560 * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
561 * ignore, this cannot affect correctness, only performance */
562
563 static enum mali_pre_post_frame_shader_mode
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,bool force_clean_tile)564 pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_clean_tile)
565 {
566 if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
567 return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
568 else
569 return mode;
570 }
571
572 /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
573 * the effective tile size differs from the superblock size of any enabled AFBC
574 * render target. Check this condition. */
575
576 static bool
pan_force_clean_write_rt(const struct pan_image_view * rt,unsigned tile_size)577 pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size)
578 {
579 if (!drm_is_afbc(rt->image->layout.modifier))
580 return false;
581
582 unsigned superblock = panfrost_block_dim(rt->image->layout.modifier, true, 0);
583
584 assert(superblock >= 16);
585 assert(tile_size <= 16*16);
586
587 /* Tile size and superblock differ unless they are both 16x16 */
588 return !(superblock == 16 && tile_size == 16*16);
589 }
590
591 static bool
pan_force_clean_write(const struct pan_fb_info * fb,unsigned tile_size)592 pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
593 {
594 /* Maximum tile size */
595 assert(tile_size <= 16*16);
596
597 for (unsigned i = 0; i < fb->rt_count; ++i) {
598 if (fb->rts[i].view && !fb->rts[i].discard &&
599 pan_force_clean_write_rt(fb->rts[i].view, tile_size))
600 return true;
601 }
602
603 if (fb->zs.view.zs && !fb->zs.discard.z &&
604 pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
605 return true;
606
607 if (fb->zs.view.s && !fb->zs.discard.s &&
608 pan_force_clean_write_rt(fb->zs.view.s, tile_size))
609 return true;
610
611 return false;
612 }
613
614 #endif
615
616 static unsigned
pan_emit_mfbd(const struct panfrost_device * dev,const struct pan_fb_info * fb,const struct pan_tls_info * tls,const struct pan_tiler_context * tiler_ctx,void * out)617 pan_emit_mfbd(const struct panfrost_device *dev,
618 const struct pan_fb_info *fb,
619 const struct pan_tls_info *tls,
620 const struct pan_tiler_context *tiler_ctx,
621 void *out)
622 {
623 unsigned tags = MALI_FBD_TAG_IS_MFBD;
624 void *fbd = out;
625 void *rtd = out + pan_size(FRAMEBUFFER);
626
627 #if PAN_ARCH <= 5
628 GENX(pan_emit_tls)(tls,
629 pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
630 #endif
631
632 unsigned tile_size;
633 unsigned internal_cbuf_size = pan_internal_cbuf_size(fb, &tile_size);
634 int crc_rt = GENX(pan_select_crc_rt)(fb);
635 bool has_zs_crc_ext = pan_fbd_has_zs_crc_ext(fb);
636
637 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
638 #if PAN_ARCH >= 6
639 bool force_clean_write = pan_force_clean_write(fb, tile_size);
640
641 cfg.sample_locations =
642 panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples));
643 cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], force_clean_write);
644 cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], force_clean_write);
645 cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2], force_clean_write);
646 cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
647 cfg.tiler = tiler_ctx->bifrost;
648 #endif
649 cfg.width = fb->width;
650 cfg.height = fb->height;
651 cfg.bound_max_x = fb->width - 1;
652 cfg.bound_max_y = fb->height - 1;
653
654 cfg.effective_tile_size = tile_size;
655 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
656 cfg.render_target_count = MAX2(fb->rt_count, 1);
657
658 /* Default to 24 bit depth if there's no surface. */
659 cfg.z_internal_format =
660 fb->zs.view.zs ?
661 panfrost_get_z_internal_format(fb->zs.view.zs->format) :
662 MALI_Z_INTERNAL_FORMAT_D24;
663
664 cfg.z_clear = fb->zs.clear_value.depth;
665 cfg.s_clear = fb->zs.clear_value.stencil;
666 cfg.color_buffer_allocation = internal_cbuf_size;
667 cfg.sample_count = fb->nr_samples;
668 cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
669 cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
670 cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
671 cfg.has_zs_crc_extension = has_zs_crc_ext;
672
673 if (crc_rt >= 0) {
674 bool *valid = fb->rts[crc_rt].crc_valid;
675 bool full = !fb->extent.minx && !fb->extent.miny &&
676 fb->extent.maxx == (fb->width - 1) &&
677 fb->extent.maxy == (fb->height - 1);
678
679 cfg.crc_read_enable = *valid;
680
681 /* If the data is currently invalid, still write CRC
682 * data if we are doing a full write, so that it is
683 * valid for next time. */
684 cfg.crc_write_enable = *valid || full;
685
686 *valid |= full;
687 }
688 }
689
690 #if PAN_ARCH >= 6
691 pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding);
692 #else
693 pan_emit_midgard_tiler(dev, fb, tiler_ctx,
694 pan_section_ptr(fbd, FRAMEBUFFER, TILER));
695
696 /* All weights set to 0, nothing to do here */
697 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
698 #endif
699
700 if (has_zs_crc_ext) {
701 pan_emit_zs_crc_ext(fb, crc_rt,
702 out + pan_size(FRAMEBUFFER));
703 rtd += pan_size(ZS_CRC_EXTENSION);
704 tags |= MALI_FBD_TAG_HAS_ZS_RT;
705 }
706
707 unsigned rt_count = MAX2(fb->rt_count, 1);
708 unsigned cbuf_offset = 0;
709 for (unsigned i = 0; i < rt_count; i++) {
710 pan_emit_rt(fb, i, cbuf_offset, rtd);
711 rtd += pan_size(RENDER_TARGET);
712 if (!fb->rts[i].view)
713 continue;
714
715 cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
716 tile_size * fb->rts[i].view->image->layout.nr_samples;
717
718 if (i != crc_rt)
719 *(fb->rts[i].crc_valid) = false;
720 }
721 tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2;
722
723 return tags;
724 }
725 #else /* PAN_ARCH == 4 */
726 static void
pan_emit_sfbd_tiler(const struct panfrost_device * dev,const struct pan_fb_info * fb,const struct pan_tiler_context * ctx,void * fbd)727 pan_emit_sfbd_tiler(const struct panfrost_device *dev,
728 const struct pan_fb_info *fb,
729 const struct pan_tiler_context *ctx,
730 void *fbd)
731 {
732 pan_emit_midgard_tiler(dev, fb, ctx,
733 pan_section_ptr(fbd, FRAMEBUFFER, TILER));
734
735 /* All weights set to 0, nothing to do here */
736 pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding);
737 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
738 }
739
740 static void
pan_emit_sfbd(const struct panfrost_device * dev,const struct pan_fb_info * fb,const struct pan_tls_info * tls,const struct pan_tiler_context * tiler_ctx,void * fbd)741 pan_emit_sfbd(const struct panfrost_device *dev,
742 const struct pan_fb_info *fb,
743 const struct pan_tls_info *tls,
744 const struct pan_tiler_context *tiler_ctx,
745 void *fbd)
746 {
747 GENX(pan_emit_tls)(tls,
748 pan_section_ptr(fbd, FRAMEBUFFER,
749 LOCAL_STORAGE));
750 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
751 cfg.bound_max_x = fb->width - 1;
752 cfg.bound_max_y = fb->height - 1;
753 cfg.dithering_enable = true;
754 cfg.clean_pixel_write_enable = true;
755 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
756 if (fb->rts[0].clear) {
757 cfg.clear_color_0 = fb->rts[0].clear_value[0];
758 cfg.clear_color_1 = fb->rts[0].clear_value[1];
759 cfg.clear_color_2 = fb->rts[0].clear_value[2];
760 cfg.clear_color_3 = fb->rts[0].clear_value[3];
761 }
762
763 if (fb->zs.clear.z)
764 cfg.z_clear = fb->zs.clear_value.depth;
765
766 if (fb->zs.clear.s)
767 cfg.s_clear = fb->zs.clear_value.stencil;
768
769 if (fb->rt_count && fb->rts[0].view) {
770 const struct pan_image_view *rt = fb->rts[0].view;
771
772 const struct util_format_description *desc =
773 util_format_description(rt->format);
774
775 /* The swizzle for rendering is inverted from texturing */
776 unsigned char swizzle[4];
777 panfrost_invert_swizzle(desc->swizzle, swizzle);
778 cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
779
780 struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
781 if (fmt.internal) {
782 cfg.internal_format = fmt.internal;
783 cfg.color_writeback_format = fmt.writeback;
784 } else {
785 unreachable("raw formats not finished for SFBD");
786 }
787
788 unsigned level = rt->first_level;
789 struct pan_surface surf;
790
791 pan_iview_get_surface(rt, 0, 0, 0, &surf);
792
793 cfg.color_write_enable = !fb->rts[0].discard;
794 cfg.color_writeback.base = surf.data;
795 cfg.color_writeback.row_stride =
796 rt->image->layout.slices[level].row_stride;
797
798 cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier);
799 assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
800 cfg.color_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
801
802 if (rt->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) {
803 const struct pan_image_slice_layout *slice =
804 &rt->image->layout.slices[level];
805
806 cfg.crc_buffer.row_stride = slice->crc.stride;
807 if (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND) {
808 cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu +
809 rt->image->data.offset +
810 slice->crc.offset;
811 } else {
812 cfg.crc_buffer.base = rt->image->crc.bo->ptr.gpu +
813 rt->image->crc.offset +
814 slice->crc.offset;
815 }
816 }
817 }
818
819 if (fb->zs.view.zs) {
820 const struct pan_image_view *zs = fb->zs.view.zs;
821 unsigned level = zs->first_level;
822 struct pan_surface surf;
823
824 pan_iview_get_surface(zs, 0, 0, 0, &surf);
825
826 cfg.zs_write_enable = !fb->zs.discard.z;
827 cfg.zs_writeback.base = surf.data;
828 cfg.zs_writeback.row_stride =
829 zs->image->layout.slices[level].row_stride;
830 cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
831 assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
832 cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
833
834 cfg.zs_format = translate_zs_format(zs->format);
835 }
836
837 cfg.sample_count = fb->nr_samples;
838
839 if (fb->rt_count)
840 cfg.msaa = mali_sampling_mode(fb->rts[0].view);
841 }
842 pan_emit_sfbd_tiler(dev, fb, tiler_ctx, fbd);
843 pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding);
844 }
845 #endif
846
847 unsigned
GENX(pan_emit_fbd)848 GENX(pan_emit_fbd)(const struct panfrost_device *dev,
849 const struct pan_fb_info *fb,
850 const struct pan_tls_info *tls,
851 const struct pan_tiler_context *tiler_ctx,
852 void *out)
853 {
854 #if PAN_ARCH == 4
855 assert(fb->rt_count <= 1);
856 pan_emit_sfbd(dev, fb, tls, tiler_ctx, out);
857 return 0;
858 #else
859 return pan_emit_mfbd(dev, fb, tls, tiler_ctx, out);
860 #endif
861 }
862
863 #if PAN_ARCH >= 6
864 void
GENX(pan_emit_tiler_heap)865 GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev,
866 void *out)
867 {
868 pan_pack(out, TILER_HEAP, heap) {
869 heap.size = dev->tiler_heap->size;
870 heap.base = dev->tiler_heap->ptr.gpu;
871 heap.bottom = dev->tiler_heap->ptr.gpu;
872 heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
873 }
874 }
875
876 void
GENX(pan_emit_tiler_ctx)877 GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev,
878 unsigned fb_width, unsigned fb_height,
879 unsigned nr_samples,
880 mali_ptr heap,
881 void *out)
882 {
883 unsigned max_levels = dev->tiler_features.max_levels;
884 assert(max_levels >= 2);
885
886 pan_pack(out, TILER_CONTEXT, tiler) {
887 /* TODO: Select hierarchy mask more effectively */
888 tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
889 tiler.fb_width = fb_width;
890 tiler.fb_height = fb_height;
891 tiler.heap = heap;
892 tiler.sample_pattern = pan_sample_pattern(nr_samples);
893 }
894 }
895 #endif
896
897 void
GENX(pan_emit_fragment_job)898 GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb,
899 mali_ptr fbd,
900 void *out)
901 {
902 pan_section_pack(out, FRAGMENT_JOB, HEADER, header) {
903 header.type = MALI_JOB_TYPE_FRAGMENT;
904 header.index = 1;
905 }
906
907 pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
908 payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
909 payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
910 payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
911 payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
912 payload.framebuffer = fbd;
913
914 #if PAN_ARCH >= 5
915 if (fb->tile_map.base) {
916 payload.has_tile_enable_map = true;
917 payload.tile_enable_map = fb->tile_map.base;
918 payload.tile_enable_map_row_stride = fb->tile_map.stride;
919 }
920 #endif
921 }
922 }
923