1 /*
2 * Copyright (C) 2019-2022 Collabora, Ltd.
3 * Copyright (C) 2018-2019 Alyssa Rosenzweig
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include "util/log.h"
27 #include "util/macros.h"
28 #include "util/u_math.h"
29 #include "pan_texture.h"
30
31 /*
32 * List of supported modifiers, in descending order of preference. AFBC is
33 * faster than u-interleaved tiling which is faster than linear. Within AFBC,
34 * enabling the YUV-like transform is typically a win where possible.
35 */
36 uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
37 DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
38 AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC |
39 AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR),
40
41 DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
42 AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC |
43 AFBC_FORMAT_MOD_SPARSE),
44
45 DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
46 AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR),
47
48 DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
49 AFBC_FORMAT_MOD_SPARSE),
50
51 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
52 DRM_FORMAT_MOD_LINEAR};
53
54 /* Table of AFBC superblock sizes */
55 static const struct pan_block_size afbc_superblock_sizes[] = {
56 [AFBC_FORMAT_MOD_BLOCK_SIZE_16x16] = {16, 16},
57 [AFBC_FORMAT_MOD_BLOCK_SIZE_32x8] = {32, 8},
58 [AFBC_FORMAT_MOD_BLOCK_SIZE_64x4] = {64, 4},
59 };
60
61 /*
62 * Given an AFBC modifier, return the superblock size.
63 *
64 * We do not yet have any use cases for multiplanar YCBCr formats with different
65 * superblock sizes on the luma and chroma planes. These formats are unsupported
66 * for now.
67 */
68 struct pan_block_size
panfrost_afbc_superblock_size(uint64_t modifier)69 panfrost_afbc_superblock_size(uint64_t modifier)
70 {
71 unsigned index = (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK);
72
73 assert(drm_is_afbc(modifier));
74 assert(index < ARRAY_SIZE(afbc_superblock_sizes));
75
76 return afbc_superblock_sizes[index];
77 }
78
79 /*
80 * Given an AFBC modifier, return the width of the superblock.
81 */
82 unsigned
panfrost_afbc_superblock_width(uint64_t modifier)83 panfrost_afbc_superblock_width(uint64_t modifier)
84 {
85 return panfrost_afbc_superblock_size(modifier).width;
86 }
87
88 /*
89 * Given an AFBC modifier, return the height of the superblock.
90 */
91 unsigned
panfrost_afbc_superblock_height(uint64_t modifier)92 panfrost_afbc_superblock_height(uint64_t modifier)
93 {
94 return panfrost_afbc_superblock_size(modifier).height;
95 }
96
97 /*
98 * Given an AFBC modifier, return if "wide blocks" are used. Wide blocks are
99 * defined as superblocks wider than 16 pixels, the minimum (and default) super
100 * block width.
101 */
102 bool
panfrost_afbc_is_wide(uint64_t modifier)103 panfrost_afbc_is_wide(uint64_t modifier)
104 {
105 return panfrost_afbc_superblock_width(modifier) > 16;
106 }
107
108 /*
109 * Given an AFBC modifier, return the subblock size (subdivision of a
110 * superblock). This is always 4x4 for now as we only support one AFBC
111 * superblock layout.
112 */
113 struct pan_block_size
panfrost_afbc_subblock_size(uint64_t modifier)114 panfrost_afbc_subblock_size(uint64_t modifier)
115 {
116 return (struct pan_block_size){4, 4};
117 }
118
119 /*
120 * Given a format, determine the tile size used for u-interleaving. For formats
121 * that are already block compressed, this is 4x4. For all other formats, this
122 * is 16x16, hence the modifier name.
123 */
124 static inline struct pan_block_size
panfrost_u_interleaved_tile_size(enum pipe_format format)125 panfrost_u_interleaved_tile_size(enum pipe_format format)
126 {
127 if (util_format_is_compressed(format))
128 return (struct pan_block_size){4, 4};
129 else
130 return (struct pan_block_size){16, 16};
131 }
132
133 /*
134 * Determine the block size used for interleaving. For u-interleaving, this is
135 * the tile size. For AFBC, this is the superblock size. For linear textures,
136 * this is trivially 1x1.
137 */
138 struct pan_block_size
panfrost_block_size(uint64_t modifier,enum pipe_format format)139 panfrost_block_size(uint64_t modifier, enum pipe_format format)
140 {
141 if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
142 return panfrost_u_interleaved_tile_size(format);
143 else if (drm_is_afbc(modifier))
144 return panfrost_afbc_superblock_size(modifier);
145 else
146 return (struct pan_block_size){1, 1};
147 }
148
149 /*
150 * Determine the tile size used by AFBC. This tiles superblocks themselves.
151 * Current GPUs support either 8x8 tiling or no tiling (1x1)
152 */
153 static inline unsigned
pan_afbc_tile_size(uint64_t modifier)154 pan_afbc_tile_size(uint64_t modifier)
155 {
156 return (modifier & AFBC_FORMAT_MOD_TILED) ? 8 : 1;
157 }
158
159 /*
160 * Determine the number of bytes between header rows for an AFBC image. For an
161 * image with linear headers, this is simply the number of header blocks
162 * (=superblocks) per row times the numbers of bytes per header block. For an
163 * image with tiled headers, this is multipled by the number of rows of
164 * header blocks are in a tile together.
165 */
166 uint32_t
pan_afbc_row_stride(uint64_t modifier,uint32_t width)167 pan_afbc_row_stride(uint64_t modifier, uint32_t width)
168 {
169 unsigned block_width = panfrost_afbc_superblock_width(modifier);
170
171 return (width / block_width) * pan_afbc_tile_size(modifier) *
172 AFBC_HEADER_BYTES_PER_TILE;
173 }
174
175 /*
176 * Determine the number of header blocks between header rows. This is equal to
177 * the number of bytes between header rows divided by the bytes per blocks of a
178 * header tile. This is also divided by the tile size to give a "line stride" in
179 * blocks, rather than a real row stride. This is required by Bifrost.
180 */
181 uint32_t
pan_afbc_stride_blocks(uint64_t modifier,uint32_t row_stride_bytes)182 pan_afbc_stride_blocks(uint64_t modifier, uint32_t row_stride_bytes)
183 {
184 return row_stride_bytes /
185 (AFBC_HEADER_BYTES_PER_TILE * pan_afbc_tile_size(modifier));
186 }
187
188 /*
189 * Determine the required alignment for the slice offset of an image. For
190 * now, this is always aligned on 64-byte boundaries. */
191 uint32_t
pan_slice_align(uint64_t modifier)192 pan_slice_align(uint64_t modifier)
193 {
194 return 64;
195 }
196
197 /*
198 * Determine the required alignment for the body offset of an AFBC image. For
199 * now, this depends only on whether tiling is in use. These minimum alignments
200 * are required on all current GPUs.
201 */
202 uint32_t
pan_afbc_body_align(uint64_t modifier)203 pan_afbc_body_align(uint64_t modifier)
204 {
205 return (modifier & AFBC_FORMAT_MOD_TILED) ? 4096 : 64;
206 }
207
208 static inline unsigned
format_minimum_alignment(unsigned arch,enum pipe_format format,bool afbc)209 format_minimum_alignment(unsigned arch, enum pipe_format format, bool afbc)
210 {
211 if (afbc)
212 return 16;
213
214 if (arch < 7)
215 return 64;
216
217 switch (format) {
218 /* For v7+, NV12/NV21/I420 have a looser alignment requirement of 16 bytes */
219 case PIPE_FORMAT_R8_G8B8_420_UNORM:
220 case PIPE_FORMAT_G8_B8R8_420_UNORM:
221 case PIPE_FORMAT_R8_G8_B8_420_UNORM:
222 case PIPE_FORMAT_R8_B8_G8_420_UNORM:
223 return 16;
224 default:
225 return 64;
226 }
227 }
228
229 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
230 * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
231 * This feature is also known as "transaction elimination". */
232
233 #define CHECKSUM_TILE_WIDTH 16
234 #define CHECKSUM_TILE_HEIGHT 16
235 #define CHECKSUM_BYTES_PER_TILE 8
236
237 unsigned
panfrost_compute_checksum_size(struct pan_image_slice_layout * slice,unsigned width,unsigned height)238 panfrost_compute_checksum_size(struct pan_image_slice_layout *slice,
239 unsigned width, unsigned height)
240 {
241 unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH);
242 unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT);
243
244 slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
245
246 return slice->crc.stride * tile_count_y;
247 }
248
249 unsigned
panfrost_get_layer_stride(const struct pan_image_layout * layout,unsigned level)250 panfrost_get_layer_stride(const struct pan_image_layout *layout, unsigned level)
251 {
252 if (layout->dim != MALI_TEXTURE_DIMENSION_3D)
253 return layout->array_stride;
254 else if (drm_is_afbc(layout->modifier))
255 return layout->slices[level].afbc.surface_stride;
256 else
257 return layout->slices[level].surface_stride;
258 }
259
260 unsigned
panfrost_get_legacy_stride(const struct pan_image_layout * layout,unsigned level)261 panfrost_get_legacy_stride(const struct pan_image_layout *layout,
262 unsigned level)
263 {
264 unsigned row_stride = layout->slices[level].row_stride;
265 struct pan_block_size block_size =
266 panfrost_block_size(layout->modifier, layout->format);
267
268 if (drm_is_afbc(layout->modifier)) {
269 unsigned width = u_minify(layout->width, level);
270 unsigned alignment =
271 block_size.width * pan_afbc_tile_size(layout->modifier);
272
273 width = ALIGN_POT(width, alignment);
274 return width * util_format_get_blocksize(layout->format);
275 } else {
276 return row_stride / block_size.height;
277 }
278 }
279
280 unsigned
panfrost_from_legacy_stride(unsigned legacy_stride,enum pipe_format format,uint64_t modifier)281 panfrost_from_legacy_stride(unsigned legacy_stride, enum pipe_format format,
282 uint64_t modifier)
283 {
284 struct pan_block_size block_size = panfrost_block_size(modifier, format);
285
286 if (drm_is_afbc(modifier)) {
287 unsigned width = legacy_stride / util_format_get_blocksize(format);
288
289 return pan_afbc_row_stride(modifier, width);
290 } else {
291 return legacy_stride * block_size.height;
292 }
293 }
294
295 /* Computes the offset into a texture at a particular level/face. Add to
296 * the base address of a texture to get the address to that level/face */
297
298 unsigned
panfrost_texture_offset(const struct pan_image_layout * layout,unsigned level,unsigned array_idx,unsigned surface_idx)299 panfrost_texture_offset(const struct pan_image_layout *layout, unsigned level,
300 unsigned array_idx, unsigned surface_idx)
301 {
302 return layout->slices[level].offset + (array_idx * layout->array_stride) +
303 (surface_idx * layout->slices[level].surface_stride);
304 }
305
306 bool
pan_image_layout_init(unsigned arch,struct pan_image_layout * layout,const struct pan_image_explicit_layout * explicit_layout)307 pan_image_layout_init(unsigned arch, struct pan_image_layout *layout,
308 const struct pan_image_explicit_layout *explicit_layout)
309 {
310 /* Explicit stride only work with non-mipmap, non-array, single-sample
311 * 2D image without CRC.
312 */
313 if (explicit_layout &&
314 (layout->depth > 1 || layout->nr_samples > 1 || layout->array_size > 1 ||
315 layout->dim != MALI_TEXTURE_DIMENSION_2D || layout->nr_slices > 1 ||
316 layout->crc))
317 return false;
318
319 bool afbc = drm_is_afbc(layout->modifier);
320 int align_req = format_minimum_alignment(arch, layout->format, afbc);
321
322 /* Mandate alignment */
323 if (explicit_layout) {
324 bool rejected = false;
325
326 int align_mask = align_req - 1;
327
328 if (arch >= 7) {
329 rejected = ((explicit_layout->offset & align_mask) ||
330 (explicit_layout->row_stride & align_mask));
331 } else {
332 rejected = (explicit_layout->offset & align_mask);
333 }
334
335 if (rejected) {
336 mesa_loge(
337 "panfrost: rejecting image due to unsupported offset or stride "
338 "alignment.\n");
339 return false;
340 }
341 }
342
343 unsigned fmt_blocksize = util_format_get_blocksize(layout->format);
344
345 /* MSAA is implemented as a 3D texture with z corresponding to the
346 * sample #, horrifyingly enough */
347
348 assert(layout->depth == 1 || layout->nr_samples == 1);
349
350 bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR;
351 bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D;
352
353 unsigned offset = explicit_layout ? explicit_layout->offset : 0;
354 struct pan_block_size block_size =
355 panfrost_block_size(layout->modifier, layout->format);
356
357 unsigned width = layout->width;
358 unsigned height = layout->height;
359 unsigned depth = layout->depth;
360
361 unsigned align_w = block_size.width;
362 unsigned align_h = block_size.height;
363
364 /* For tiled AFBC, align to tiles of superblocks (this can be large) */
365 if (afbc) {
366 align_w *= pan_afbc_tile_size(layout->modifier);
367 align_h *= pan_afbc_tile_size(layout->modifier);
368 }
369
370 for (unsigned l = 0; l < layout->nr_slices; ++l) {
371 struct pan_image_slice_layout *slice = &layout->slices[l];
372
373 unsigned effective_width =
374 ALIGN_POT(util_format_get_nblocksx(layout->format, width), align_w);
375 unsigned effective_height =
376 ALIGN_POT(util_format_get_nblocksy(layout->format, height), align_h);
377
378 /* Align levels to cache-line as a performance improvement for
379 * linear/tiled and as a requirement for AFBC */
380
381 offset = ALIGN_POT(offset, pan_slice_align(layout->modifier));
382
383 slice->offset = offset;
384
385 unsigned row_stride = fmt_blocksize * effective_width * block_size.height;
386
387 /* On v7+ row_stride and offset alignment requirement are equal */
388 if (arch >= 7) {
389 row_stride = ALIGN_POT(row_stride, align_req);
390 }
391
392 if (explicit_layout && !afbc) {
393 /* Make sure the explicit stride is valid */
394 if (explicit_layout->row_stride < row_stride) {
395 mesa_loge("panfrost: rejecting image due to invalid row stride.\n");
396 return false;
397 }
398
399 row_stride = explicit_layout->row_stride;
400 } else if (linear) {
401 /* Keep lines alignment on 64 byte for performance */
402 row_stride = ALIGN_POT(row_stride, 64);
403 }
404
405 unsigned slice_one_size =
406 row_stride * (effective_height / block_size.height);
407
408 /* Compute AFBC sizes if necessary */
409 if (afbc) {
410 slice->row_stride =
411 pan_afbc_row_stride(layout->modifier, effective_width);
412 slice->afbc.stride = effective_width / block_size.width;
413 slice->afbc.nr_blocks =
414 slice->afbc.stride * (effective_height / block_size.height);
415 slice->afbc.header_size =
416 ALIGN_POT(slice->row_stride * (effective_height / align_h),
417 pan_afbc_body_align(layout->modifier));
418
419 if (explicit_layout &&
420 explicit_layout->row_stride < slice->row_stride) {
421 mesa_loge("panfrost: rejecting image due to invalid row stride.\n");
422 return false;
423 }
424
425 /* AFBC body size */
426 slice->afbc.body_size = slice_one_size;
427
428 /* 3D AFBC resources have all headers placed at the
429 * beginning instead of having them split per depth
430 * level
431 */
432 if (is_3d) {
433 slice->afbc.surface_stride = slice->afbc.header_size;
434 slice->afbc.header_size *= depth;
435 slice->afbc.body_size *= depth;
436 offset += slice->afbc.header_size;
437 } else {
438 slice_one_size += slice->afbc.header_size;
439 slice->afbc.surface_stride = slice_one_size;
440 }
441 } else {
442 slice->row_stride = row_stride;
443 }
444
445 unsigned slice_full_size = slice_one_size * depth * layout->nr_samples;
446
447 slice->surface_stride = slice_one_size;
448
449 /* Compute AFBC sizes if necessary */
450
451 offset += slice_full_size;
452 slice->size = slice_full_size;
453
454 /* Add a checksum region if necessary */
455 if (layout->crc) {
456 slice->crc.size = panfrost_compute_checksum_size(slice, width, height);
457
458 slice->crc.offset = offset;
459 offset += slice->crc.size;
460 slice->size += slice->crc.size;
461 }
462
463 width = u_minify(width, 1);
464 height = u_minify(height, 1);
465 depth = u_minify(depth, 1);
466 }
467
468 /* Arrays and cubemaps have the entire miptree duplicated */
469 layout->array_stride = ALIGN_POT(offset, 64);
470 if (explicit_layout)
471 layout->data_size = offset;
472 else
473 layout->data_size =
474 ALIGN_POT(layout->array_stride * layout->array_size, 4096);
475
476 return true;
477 }
478
479 void
pan_iview_get_surface(const struct pan_image_view * iview,unsigned level,unsigned layer,unsigned sample,struct pan_surface * surf)480 pan_iview_get_surface(const struct pan_image_view *iview, unsigned level,
481 unsigned layer, unsigned sample, struct pan_surface *surf)
482 {
483 const struct pan_image *image = pan_image_view_get_plane(iview, 0);
484
485 level += iview->first_level;
486 assert(level < image->layout.nr_slices);
487
488 layer += iview->first_layer;
489
490 bool is_3d = image->layout.dim == MALI_TEXTURE_DIMENSION_3D;
491 const struct pan_image_slice_layout *slice = &image->layout.slices[level];
492 mali_ptr base = image->data.base + image->data.offset;
493
494 if (drm_is_afbc(image->layout.modifier)) {
495 assert(!sample);
496
497 if (is_3d) {
498 ASSERTED unsigned depth = u_minify(image->layout.depth, level);
499 assert(layer < depth);
500 surf->afbc.header =
501 base + slice->offset + (layer * slice->afbc.surface_stride);
502 surf->afbc.body = base + slice->offset + slice->afbc.header_size +
503 (slice->surface_stride * layer);
504 } else {
505 assert(layer < image->layout.array_size);
506 surf->afbc.header =
507 base + panfrost_texture_offset(&image->layout, level, layer, 0);
508 surf->afbc.body = surf->afbc.header + slice->afbc.header_size;
509 }
510 } else {
511 unsigned array_idx = is_3d ? 0 : layer;
512 unsigned surface_idx = is_3d ? layer : sample;
513
514 surf->data = base + panfrost_texture_offset(&image->layout, level,
515 array_idx, surface_idx);
516 }
517 }
518