• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019-2022 Collabora, Ltd.
3  * Copyright (C) 2018-2019 Alyssa Rosenzweig
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  */
25 
26 #include "util/macros.h"
27 #include "util/u_math.h"
28 #include "pan_texture.h"
29 
30 /* List of supported modifiers, in descending order of preference. AFBC is
31  * faster than u-interleaved tiling which is faster than linear. Within AFBC,
32  * enabling the YUV-like transform is typically a win where possible. */
33 
34 uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
35         DRM_FORMAT_MOD_ARM_AFBC(
36                 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
37                 AFBC_FORMAT_MOD_TILED |
38                 AFBC_FORMAT_MOD_SC |
39                 AFBC_FORMAT_MOD_SPARSE |
40                 AFBC_FORMAT_MOD_YTR),
41 
42         DRM_FORMAT_MOD_ARM_AFBC(
43                 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
44                 AFBC_FORMAT_MOD_TILED |
45                 AFBC_FORMAT_MOD_SC |
46                 AFBC_FORMAT_MOD_SPARSE),
47 
48         DRM_FORMAT_MOD_ARM_AFBC(
49                 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
50                 AFBC_FORMAT_MOD_SPARSE |
51                 AFBC_FORMAT_MOD_YTR),
52 
53         DRM_FORMAT_MOD_ARM_AFBC(
54                 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
55                 AFBC_FORMAT_MOD_SPARSE),
56 
57         DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
58         DRM_FORMAT_MOD_LINEAR
59 };
60 
61 /* Table of AFBC superblock sizes */
62 static const struct pan_block_size
63 afbc_superblock_sizes[] = {
64         [AFBC_FORMAT_MOD_BLOCK_SIZE_16x16]      = { 16, 16 },
65         [AFBC_FORMAT_MOD_BLOCK_SIZE_32x8]       = { 32,  8 },
66         [AFBC_FORMAT_MOD_BLOCK_SIZE_64x4]       = { 64,  4 },
67 };
68 
69 /*
70  * Given an AFBC modifier, return the superblock size.
71  *
72  * We do not yet have any use cases for multiplanar YCBCr formats with different
73  * superblock sizes on the luma and chroma planes. These formats are unsupported
74  * for now.
75  */
76 struct pan_block_size
panfrost_afbc_superblock_size(uint64_t modifier)77 panfrost_afbc_superblock_size(uint64_t modifier)
78 {
79         unsigned index = (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK);
80 
81         assert(drm_is_afbc(modifier));
82         assert(index < ARRAY_SIZE(afbc_superblock_sizes));
83 
84         return afbc_superblock_sizes[index];
85 }
86 
87 /*
88  * Given an AFBC modifier, return the width of the superblock.
89  */
90 unsigned
panfrost_afbc_superblock_width(uint64_t modifier)91 panfrost_afbc_superblock_width(uint64_t modifier)
92 {
93         return panfrost_afbc_superblock_size(modifier).width;
94 }
95 
96 /*
97  * Given an AFBC modifier, return the height of the superblock.
98  */
99 unsigned
panfrost_afbc_superblock_height(uint64_t modifier)100 panfrost_afbc_superblock_height(uint64_t modifier)
101 {
102         return panfrost_afbc_superblock_size(modifier).height;
103 }
104 
105 /*
106  * Given an AFBC modifier, return if "wide blocks" are used. Wide blocks are
107  * defined as superblocks wider than 16 pixels, the minimum (and default) super
108  * block width.
109  */
110 bool
panfrost_afbc_is_wide(uint64_t modifier)111 panfrost_afbc_is_wide(uint64_t modifier)
112 {
113         return panfrost_afbc_superblock_width(modifier) > 16;
114 }
115 
116 /*
117  * Given a format, determine the tile size used for u-interleaving. For formats
118  * that are already block compressed, this is 4x4. For all other formats, this
119  * is 16x16, hence the modifier name.
120  */
121 static inline struct pan_block_size
panfrost_u_interleaved_tile_size(enum pipe_format format)122 panfrost_u_interleaved_tile_size(enum pipe_format format)
123 {
124         if (util_format_is_compressed(format))
125                 return (struct pan_block_size) {  4,  4 };
126         else
127                 return (struct pan_block_size) { 16, 16 };
128 }
129 
130 /*
131  * Determine the block size used for interleaving. For u-interleaving, this is
132  * the tile size. For AFBC, this is the superblock size. For linear textures,
133  * this is trivially 1x1.
134  */
135 struct pan_block_size
panfrost_block_size(uint64_t modifier,enum pipe_format format)136 panfrost_block_size(uint64_t modifier, enum pipe_format format)
137 {
138         if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
139                 return panfrost_u_interleaved_tile_size(format);
140         else if (drm_is_afbc(modifier))
141                 return panfrost_afbc_superblock_size(modifier);
142         else
143                 return (struct pan_block_size) { 1, 1 };
144 }
145 
146 /*
147  * Determine the tile size used by AFBC. This tiles superblocks themselves.
148  * Current GPUs support either 8x8 tiling or no tiling (1x1)
149  */
150 static inline unsigned
pan_afbc_tile_size(uint64_t modifier)151 pan_afbc_tile_size(uint64_t modifier)
152 {
153         return (modifier & AFBC_FORMAT_MOD_TILED) ? 8 : 1;
154 }
155 
156 /*
157  * Determine the number of bytes between header rows for an AFBC image. For an
158  * image with linear headers, this is simply the number of header blocks
159  * (=superblocks) per row times the numbers of bytes per header block. For an
160  * image with linear headers, this is multipled by the number of rows of
161  * header blocks are in a tile together.
162  */
163 uint32_t
pan_afbc_row_stride(uint64_t modifier,uint32_t width)164 pan_afbc_row_stride(uint64_t modifier, uint32_t width)
165 {
166         unsigned block_width = panfrost_afbc_superblock_width(modifier);
167 
168         return (width / block_width) * pan_afbc_tile_size(modifier) *
169                 AFBC_HEADER_BYTES_PER_TILE;
170 }
171 
172 /*
173  * Determine the number of header blocks between header rows. This is equal to
174  * the number of bytes between header rows divided by the bytes per blocks of a
175  * header tile. This is also divided by the tile size to give a "line stride" in
176  * blocks, rather than a real row stride. This is required by Bifrost.
177  */
178 uint32_t
pan_afbc_stride_blocks(uint64_t modifier,uint32_t row_stride_bytes)179 pan_afbc_stride_blocks(uint64_t modifier, uint32_t row_stride_bytes)
180 {
181         return row_stride_bytes /
182                (AFBC_HEADER_BYTES_PER_TILE * pan_afbc_tile_size(modifier));
183 }
184 
185 /*
186  * Determine the required alignment for the body offset of an AFBC image. For
187  * now, this depends only on whether tiling is in use. These minimum alignments
188  * are required on all current GPUs.
189  */
190 static inline uint32_t
pan_afbc_body_align(uint64_t modifier)191 pan_afbc_body_align(uint64_t modifier)
192 {
193         return (modifier & AFBC_FORMAT_MOD_TILED) ? 4096 : 64;
194 }
195 
196 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
197  * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
198  * This feature is also known as "transaction elimination". */
199 
200 #define CHECKSUM_TILE_WIDTH 16
201 #define CHECKSUM_TILE_HEIGHT 16
202 #define CHECKSUM_BYTES_PER_TILE 8
203 
204 unsigned
panfrost_compute_checksum_size(struct pan_image_slice_layout * slice,unsigned width,unsigned height)205 panfrost_compute_checksum_size(
206         struct pan_image_slice_layout *slice,
207         unsigned width,
208         unsigned height)
209 {
210         unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH);
211         unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT);
212 
213         slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
214 
215         return slice->crc.stride * tile_count_y;
216 }
217 
218 unsigned
panfrost_get_layer_stride(const struct pan_image_layout * layout,unsigned level)219 panfrost_get_layer_stride(const struct pan_image_layout *layout,
220                           unsigned level)
221 {
222         if (layout->dim != MALI_TEXTURE_DIMENSION_3D)
223                 return layout->array_stride;
224         else if (drm_is_afbc(layout->modifier))
225                 return layout->slices[level].afbc.surface_stride;
226         else
227                 return layout->slices[level].surface_stride;
228 }
229 
230 unsigned
panfrost_get_legacy_stride(const struct pan_image_layout * layout,unsigned level)231 panfrost_get_legacy_stride(const struct pan_image_layout *layout,
232                            unsigned level)
233 {
234         unsigned row_stride = layout->slices[level].row_stride;
235         struct pan_block_size block_size =
236                 panfrost_block_size(layout->modifier, layout->format);
237 
238         if (drm_is_afbc(layout->modifier)) {
239                 unsigned width = u_minify(layout->width, level);
240                 width = ALIGN_POT(width, block_size.width);
241 
242                 return width * util_format_get_blocksize(layout->format);
243         } else {
244                 return row_stride / block_size.height;
245         }
246 }
247 
248 unsigned
panfrost_from_legacy_stride(unsigned legacy_stride,enum pipe_format format,uint64_t modifier)249 panfrost_from_legacy_stride(unsigned legacy_stride,
250                             enum pipe_format format,
251                             uint64_t modifier)
252 {
253         struct pan_block_size block_size =
254                 panfrost_block_size(modifier, format);
255 
256         if (drm_is_afbc(modifier)) {
257                 unsigned width = legacy_stride / util_format_get_blocksize(format);
258 
259                 return pan_afbc_row_stride(modifier, width);
260         } else {
261                 return legacy_stride * block_size.height;
262         }
263 }
264 
265 /* Computes the offset into a texture at a particular level/face. Add to
266  * the base address of a texture to get the address to that level/face */
267 
268 unsigned
panfrost_texture_offset(const struct pan_image_layout * layout,unsigned level,unsigned array_idx,unsigned surface_idx)269 panfrost_texture_offset(const struct pan_image_layout *layout,
270                         unsigned level, unsigned array_idx,
271                         unsigned surface_idx)
272 {
273         return layout->slices[level].offset +
274                (array_idx * layout->array_stride) +
275                (surface_idx * layout->slices[level].surface_stride);
276 }
277 
278 /*
279  * Return the minimum stride alignment in bytes for a given texture format.
280  *
281  * There is no format on any supported Mali with a minimum alignment greater
282  * than 64 bytes, but 64 bytes is the required alignment of all regular formats
283  * in v7 and newer. If this alignment is not met, imprecise faults may be
284  * raised.
285  *
286  * This may not be necessary on older hardware but we enforce it there too for
287  * uniformity. If this poses a problem there, we'll need a solution that can
288  * handle v7 as well.
289  *
290  * Certain non-regular formats require smaller power-of-two alignments.
291  * This requirement could be loosened in the future if there is a compelling
292  * reason, by making this query more precise.
293  */
294 uint32_t
pan_stride_align_B(UNUSED enum pipe_format format)295 pan_stride_align_B(UNUSED enum pipe_format format)
296 {
297         return 64;
298 }
299 
300 bool
pan_is_stride_aligned(enum pipe_format format,uint32_t stride_B)301 pan_is_stride_aligned(enum pipe_format format, uint32_t stride_B)
302 {
303         return (stride_B % pan_stride_align_B(format)) == 0;
304 }
305 
306 bool
pan_image_layout_init(struct pan_image_layout * layout,const struct pan_image_explicit_layout * explicit_layout)307 pan_image_layout_init(struct pan_image_layout *layout,
308                       const struct pan_image_explicit_layout *explicit_layout)
309 {
310         /* Explicit stride only work with non-mipmap, non-array; single-sample
311          * 2D image, and in-band CRC can't be used.
312          */
313         if (explicit_layout &&
314 	    (layout->depth > 1 || layout->nr_samples > 1 ||
315              layout->array_size > 1 || layout->dim != MALI_TEXTURE_DIMENSION_2D ||
316              layout->nr_slices > 1 || layout->crc_mode == PAN_IMAGE_CRC_INBAND))
317                 return false;
318 
319         /* Require both offsets and strides to be aligned to the hardware
320          * requirement. Panfrost allocates offsets and strides like this, so
321          * this requirement is satisfied by any image that was exported from
322          * another process with Panfrost. However, it does restrict imports of
323          * EGL external images.
324          */
325         if (explicit_layout &&
326             !(pan_is_stride_aligned(layout->format, explicit_layout->offset) &&
327               pan_is_stride_aligned(layout->format, explicit_layout->row_stride)))
328                 return false;
329 
330         unsigned fmt_blocksize = util_format_get_blocksize(layout->format);
331 
332         /* MSAA is implemented as a 3D texture with z corresponding to the
333          * sample #, horrifyingly enough */
334 
335         assert(layout->depth == 1 || layout->nr_samples == 1);
336 
337         bool afbc = drm_is_afbc(layout->modifier);
338         bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR;
339         bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D;
340 
341         unsigned oob_crc_offset = 0;
342         unsigned offset = explicit_layout ? explicit_layout->offset : 0;
343         struct pan_block_size block_size =
344                 panfrost_block_size(layout->modifier, layout->format);
345 
346         unsigned width = layout->width;
347         unsigned height = layout->height;
348         unsigned depth = layout->depth;
349 
350         unsigned align_w = block_size.width;
351         unsigned align_h = block_size.height;
352 
353         /* For tiled AFBC, align to tiles of superblocks (this can be large) */
354         if (afbc) {
355                 align_w *= pan_afbc_tile_size(layout->modifier);
356                 align_h *= pan_afbc_tile_size(layout->modifier);
357         }
358 
359         for (unsigned l = 0; l < layout->nr_slices; ++l) {
360                 struct pan_image_slice_layout *slice = &layout->slices[l];
361 
362                 unsigned effective_width = ALIGN_POT(util_format_get_nblocksx(layout->format, width), align_w);
363                 unsigned effective_height = ALIGN_POT(util_format_get_nblocksy(layout->format, height), align_h);
364 
365                 /* Align levels to cache-line as a performance improvement for
366                  * linear/tiled and as a requirement for AFBC */
367 
368                 offset = ALIGN_POT(offset, 64);
369 
370                 slice->offset = offset;
371 
372                 unsigned row_stride = fmt_blocksize * effective_width * block_size.height;
373 
374                 if (explicit_layout && !afbc) {
375                         /* Make sure the explicit stride is valid */
376                         if (explicit_layout->row_stride < row_stride)
377                                 return false;
378 
379                         row_stride = explicit_layout->row_stride;
380                 } else if (linear) {
381                         /* Keep lines alignment on 64 byte for performance.
382                          *
383                          * Note that this is a multiple of the minimum
384                          * stride alignment, so the hardware requirement is
385                          * satisfied as a result.
386                          */
387                         row_stride = ALIGN_POT(row_stride, 64);
388                 }
389 
390 
391                 assert(pan_is_stride_aligned(layout->format, row_stride) &&
392                        "alignment gauranteed in both code paths");
393 
394                 unsigned slice_one_size = row_stride * (effective_height / block_size.height);
395 
396                 /* Compute AFBC sizes if necessary */
397                 if (afbc) {
398                         slice->row_stride =
399                                 pan_afbc_row_stride(layout->modifier, effective_width);
400                         slice->afbc.header_size =
401                                 ALIGN_POT(slice->row_stride * (effective_height / align_h),
402                                           pan_afbc_body_align(layout->modifier));
403 
404                         if (explicit_layout && explicit_layout->row_stride < slice->row_stride)
405                                 return false;
406 
407                         /* AFBC body size */
408                         slice->afbc.body_size = slice_one_size;
409 
410                         /* 3D AFBC resources have all headers placed at the
411                          * beginning instead of having them split per depth
412                          * level
413                          */
414                         if (is_3d) {
415                                 slice->afbc.surface_stride =
416                                         slice->afbc.header_size;
417                                 slice->afbc.header_size *= depth;
418                                 slice->afbc.body_size *= depth;
419                                 offset += slice->afbc.header_size;
420                         } else {
421                                 slice_one_size += slice->afbc.header_size;
422                                 slice->afbc.surface_stride = slice_one_size;
423                         }
424                 } else {
425                         slice->row_stride = row_stride;
426                 }
427 
428                 unsigned slice_full_size =
429                         slice_one_size * depth * layout->nr_samples;
430 
431                 slice->surface_stride = slice_one_size;
432 
433                 assert(pan_is_stride_aligned(layout->format, slice->surface_stride) &&
434                        "integer multiple of aligned is still aligned, "
435                        "and AFBC header is at least 64 byte aligned");
436 
437                 /* Compute AFBC sizes if necessary */
438 
439                 offset += slice_full_size;
440                 slice->size = slice_full_size;
441 
442                 /* Add a checksum region if necessary */
443                 if (layout->crc_mode != PAN_IMAGE_CRC_NONE) {
444                         slice->crc.size =
445                                 panfrost_compute_checksum_size(slice, width, height);
446 
447                         if (layout->crc_mode == PAN_IMAGE_CRC_INBAND) {
448                                 slice->crc.offset = offset;
449                                 offset += slice->crc.size;
450                                 slice->size += slice->crc.size;
451                         } else {
452                                 slice->crc.offset = oob_crc_offset;
453                                 oob_crc_offset += slice->crc.size;
454                         }
455                 }
456 
457                 width = u_minify(width, 1);
458                 height = u_minify(height, 1);
459                 depth = u_minify(depth, 1);
460         }
461 
462         /* Arrays and cubemaps have the entire miptree duplicated */
463         layout->array_stride = ALIGN_POT(offset, 64);
464         if (explicit_layout)
465                 layout->data_size = offset;
466         else
467                 layout->data_size = ALIGN_POT(layout->array_stride * layout->array_size, 4096);
468         layout->crc_size = oob_crc_offset;
469 
470         return true;
471 }
472 
473 void
pan_iview_get_surface(const struct pan_image_view * iview,unsigned level,unsigned layer,unsigned sample,struct pan_surface * surf)474 pan_iview_get_surface(const struct pan_image_view *iview,
475                       unsigned level, unsigned layer, unsigned sample,
476                       struct pan_surface *surf)
477 {
478         level += iview->first_level;
479         assert(level < iview->image->layout.nr_slices);
480 
481        layer += iview->first_layer;
482 
483         bool is_3d = iview->image->layout.dim == MALI_TEXTURE_DIMENSION_3D;
484         const struct pan_image_slice_layout *slice = &iview->image->layout.slices[level];
485         mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
486 
487         if (drm_is_afbc(iview->image->layout.modifier)) {
488                 assert(!sample);
489 
490                 if (is_3d) {
491                         ASSERTED unsigned depth = u_minify(iview->image->layout.depth, level);
492                         assert(layer < depth);
493                         surf->afbc.header = base + slice->offset +
494                                            (layer * slice->afbc.surface_stride);
495                         surf->afbc.body = base + slice->offset +
496                                           slice->afbc.header_size +
497                                           (slice->surface_stride * layer);
498                 } else {
499                         assert(layer < iview->image->layout.array_size);
500                         surf->afbc.header = base +
501                                             panfrost_texture_offset(&iview->image->layout,
502                                                                     level, layer, 0);
503                         surf->afbc.body = surf->afbc.header + slice->afbc.header_size;
504                 }
505         } else {
506                 unsigned array_idx = is_3d ? 0 : layer;
507                 unsigned surface_idx = is_3d ? layer : sample;
508 
509                 surf->data = base +
510                              panfrost_texture_offset(&iview->image->layout, level,
511                                                      array_idx, surface_idx);
512         }
513 }
514