• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019-2022 Collabora, Ltd.
3  * Copyright (C) 2018-2019 Alyssa Rosenzweig
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  */
25 
26 #include "util/log.h"
27 #include "util/macros.h"
28 #include "util/u_math.h"
29 #include "pan_texture.h"
30 
31 /*
32  * List of supported modifiers, in descending order of preference. AFBC is
33  * faster than u-interleaved tiling which is faster than linear. Within AFBC,
34  * enabling the YUV-like transform is typically a win where possible.
35  */
36 uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
37    DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
38                            AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC |
39                            AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR),
40 
41    DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
42                            AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC |
43                            AFBC_FORMAT_MOD_SPARSE),
44 
45    DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
46                            AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR),
47 
48    DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
49                            AFBC_FORMAT_MOD_SPARSE),
50 
51    DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
52    DRM_FORMAT_MOD_LINEAR};
53 
54 /* Table of AFBC superblock sizes */
55 static const struct pan_block_size afbc_superblock_sizes[] = {
56    [AFBC_FORMAT_MOD_BLOCK_SIZE_16x16] = {16, 16},
57    [AFBC_FORMAT_MOD_BLOCK_SIZE_32x8] = {32, 8},
58    [AFBC_FORMAT_MOD_BLOCK_SIZE_64x4] = {64, 4},
59 };
60 
61 /*
62  * Given an AFBC modifier, return the superblock size.
63  *
64  * We do not yet have any use cases for multiplanar YCBCr formats with different
65  * superblock sizes on the luma and chroma planes. These formats are unsupported
66  * for now.
67  */
68 struct pan_block_size
panfrost_afbc_superblock_size(uint64_t modifier)69 panfrost_afbc_superblock_size(uint64_t modifier)
70 {
71    unsigned index = (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK);
72 
73    assert(drm_is_afbc(modifier));
74    assert(index < ARRAY_SIZE(afbc_superblock_sizes));
75 
76    return afbc_superblock_sizes[index];
77 }
78 
79 /*
80  * Given an AFBC modifier, return the width of the superblock.
81  */
82 unsigned
panfrost_afbc_superblock_width(uint64_t modifier)83 panfrost_afbc_superblock_width(uint64_t modifier)
84 {
85    return panfrost_afbc_superblock_size(modifier).width;
86 }
87 
88 /*
89  * Given an AFBC modifier, return the height of the superblock.
90  */
91 unsigned
panfrost_afbc_superblock_height(uint64_t modifier)92 panfrost_afbc_superblock_height(uint64_t modifier)
93 {
94    return panfrost_afbc_superblock_size(modifier).height;
95 }
96 
97 /*
98  * Given an AFBC modifier, return if "wide blocks" are used. Wide blocks are
99  * defined as superblocks wider than 16 pixels, the minimum (and default) super
100  * block width.
101  */
102 bool
panfrost_afbc_is_wide(uint64_t modifier)103 panfrost_afbc_is_wide(uint64_t modifier)
104 {
105    return panfrost_afbc_superblock_width(modifier) > 16;
106 }
107 
108 /*
109  * Given an AFBC modifier, return the subblock size (subdivision of a
110  * superblock). This is always 4x4 for now as we only support one AFBC
111  * superblock layout.
112  */
113 struct pan_block_size
panfrost_afbc_subblock_size(uint64_t modifier)114 panfrost_afbc_subblock_size(uint64_t modifier)
115 {
116    return (struct pan_block_size){4, 4};
117 }
118 
119 /*
120  * Given a format, determine the tile size used for u-interleaving. For formats
121  * that are already block compressed, this is 4x4. For all other formats, this
122  * is 16x16, hence the modifier name.
123  */
124 static inline struct pan_block_size
panfrost_u_interleaved_tile_size(enum pipe_format format)125 panfrost_u_interleaved_tile_size(enum pipe_format format)
126 {
127    if (util_format_is_compressed(format))
128       return (struct pan_block_size){4, 4};
129    else
130       return (struct pan_block_size){16, 16};
131 }
132 
133 /*
134  * Determine the block size used for interleaving. For u-interleaving, this is
135  * the tile size. For AFBC, this is the superblock size. For linear textures,
136  * this is trivially 1x1.
137  */
138 struct pan_block_size
panfrost_block_size(uint64_t modifier,enum pipe_format format)139 panfrost_block_size(uint64_t modifier, enum pipe_format format)
140 {
141    if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
142       return panfrost_u_interleaved_tile_size(format);
143    else if (drm_is_afbc(modifier))
144       return panfrost_afbc_superblock_size(modifier);
145    else
146       return (struct pan_block_size){1, 1};
147 }
148 
149 /*
150  * Determine the tile size used by AFBC. This tiles superblocks themselves.
151  * Current GPUs support either 8x8 tiling or no tiling (1x1)
152  */
153 static inline unsigned
pan_afbc_tile_size(uint64_t modifier)154 pan_afbc_tile_size(uint64_t modifier)
155 {
156    return (modifier & AFBC_FORMAT_MOD_TILED) ? 8 : 1;
157 }
158 
159 /*
160  * Determine the number of bytes between header rows for an AFBC image. For an
161  * image with linear headers, this is simply the number of header blocks
162  * (=superblocks) per row times the numbers of bytes per header block. For an
163  * image with tiled headers, this is multipled by the number of rows of
164  * header blocks are in a tile together.
165  */
166 uint32_t
pan_afbc_row_stride(uint64_t modifier,uint32_t width)167 pan_afbc_row_stride(uint64_t modifier, uint32_t width)
168 {
169    unsigned block_width = panfrost_afbc_superblock_width(modifier);
170 
171    return (width / block_width) * pan_afbc_tile_size(modifier) *
172           AFBC_HEADER_BYTES_PER_TILE;
173 }
174 
175 /*
176  * Determine the number of header blocks between header rows. This is equal to
177  * the number of bytes between header rows divided by the bytes per blocks of a
178  * header tile. This is also divided by the tile size to give a "line stride" in
179  * blocks, rather than a real row stride. This is required by Bifrost.
180  */
181 uint32_t
pan_afbc_stride_blocks(uint64_t modifier,uint32_t row_stride_bytes)182 pan_afbc_stride_blocks(uint64_t modifier, uint32_t row_stride_bytes)
183 {
184    return row_stride_bytes /
185           (AFBC_HEADER_BYTES_PER_TILE * pan_afbc_tile_size(modifier));
186 }
187 
188 /*
189  * Determine the required alignment for the slice offset of an image. For
190  * now, this is always aligned on 64-byte boundaries. */
191 uint32_t
pan_slice_align(uint64_t modifier)192 pan_slice_align(uint64_t modifier)
193 {
194    return 64;
195 }
196 
197 /*
198  * Determine the required alignment for the body offset of an AFBC image. For
199  * now, this depends only on whether tiling is in use. These minimum alignments
200  * are required on all current GPUs.
201  */
202 uint32_t
pan_afbc_body_align(uint64_t modifier)203 pan_afbc_body_align(uint64_t modifier)
204 {
205    return (modifier & AFBC_FORMAT_MOD_TILED) ? 4096 : 64;
206 }
207 
208 static inline unsigned
format_minimum_alignment(unsigned arch,enum pipe_format format,bool afbc)209 format_minimum_alignment(unsigned arch, enum pipe_format format, bool afbc)
210 {
211    if (afbc)
212       return 16;
213 
214    if (arch < 7)
215       return 64;
216 
217    switch (format) {
218    /* For v7+, NV12/NV21/I420 have a looser alignment requirement of 16 bytes */
219    case PIPE_FORMAT_R8_G8B8_420_UNORM:
220    case PIPE_FORMAT_G8_B8R8_420_UNORM:
221    case PIPE_FORMAT_R8_G8_B8_420_UNORM:
222    case PIPE_FORMAT_R8_B8_G8_420_UNORM:
223       return 16;
224    default:
225       return 64;
226    }
227 }
228 
229 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
230  * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
231  * This feature is also known as "transaction elimination". */
232 
233 #define CHECKSUM_TILE_WIDTH     16
234 #define CHECKSUM_TILE_HEIGHT    16
235 #define CHECKSUM_BYTES_PER_TILE 8
236 
237 unsigned
panfrost_compute_checksum_size(struct pan_image_slice_layout * slice,unsigned width,unsigned height)238 panfrost_compute_checksum_size(struct pan_image_slice_layout *slice,
239                                unsigned width, unsigned height)
240 {
241    unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH);
242    unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT);
243 
244    slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
245 
246    return slice->crc.stride * tile_count_y;
247 }
248 
249 unsigned
panfrost_get_layer_stride(const struct pan_image_layout * layout,unsigned level)250 panfrost_get_layer_stride(const struct pan_image_layout *layout, unsigned level)
251 {
252    if (layout->dim != MALI_TEXTURE_DIMENSION_3D)
253       return layout->array_stride;
254    else if (drm_is_afbc(layout->modifier))
255       return layout->slices[level].afbc.surface_stride;
256    else
257       return layout->slices[level].surface_stride;
258 }
259 
260 unsigned
panfrost_get_legacy_stride(const struct pan_image_layout * layout,unsigned level)261 panfrost_get_legacy_stride(const struct pan_image_layout *layout,
262                            unsigned level)
263 {
264    unsigned row_stride = layout->slices[level].row_stride;
265    struct pan_block_size block_size =
266       panfrost_block_size(layout->modifier, layout->format);
267 
268    if (drm_is_afbc(layout->modifier)) {
269       unsigned width = u_minify(layout->width, level);
270       unsigned alignment =
271          block_size.width * pan_afbc_tile_size(layout->modifier);
272 
273       width = ALIGN_POT(width, alignment);
274       return width * util_format_get_blocksize(layout->format);
275    } else {
276       return row_stride / block_size.height;
277    }
278 }
279 
280 unsigned
panfrost_from_legacy_stride(unsigned legacy_stride,enum pipe_format format,uint64_t modifier)281 panfrost_from_legacy_stride(unsigned legacy_stride, enum pipe_format format,
282                             uint64_t modifier)
283 {
284    struct pan_block_size block_size = panfrost_block_size(modifier, format);
285 
286    if (drm_is_afbc(modifier)) {
287       unsigned width = legacy_stride / util_format_get_blocksize(format);
288 
289       return pan_afbc_row_stride(modifier, width);
290    } else {
291       return legacy_stride * block_size.height;
292    }
293 }
294 
295 /* Computes the offset into a texture at a particular level/face. Add to
296  * the base address of a texture to get the address to that level/face */
297 
298 unsigned
panfrost_texture_offset(const struct pan_image_layout * layout,unsigned level,unsigned array_idx,unsigned surface_idx)299 panfrost_texture_offset(const struct pan_image_layout *layout, unsigned level,
300                         unsigned array_idx, unsigned surface_idx)
301 {
302    return layout->slices[level].offset + (array_idx * layout->array_stride) +
303           (surface_idx * layout->slices[level].surface_stride);
304 }
305 
306 bool
pan_image_layout_init(unsigned arch,struct pan_image_layout * layout,const struct pan_image_explicit_layout * explicit_layout)307 pan_image_layout_init(unsigned arch, struct pan_image_layout *layout,
308                       const struct pan_image_explicit_layout *explicit_layout)
309 {
310    /* Explicit stride only work with non-mipmap, non-array, single-sample
311     * 2D image without CRC.
312     */
313    if (explicit_layout &&
314        (layout->depth > 1 || layout->nr_samples > 1 || layout->array_size > 1 ||
315         layout->dim != MALI_TEXTURE_DIMENSION_2D || layout->nr_slices > 1 ||
316         layout->crc))
317       return false;
318 
319    bool afbc = drm_is_afbc(layout->modifier);
320    int align_req = format_minimum_alignment(arch, layout->format, afbc);
321 
322    /* Mandate alignment */
323    if (explicit_layout) {
324       bool rejected = false;
325 
326       int align_mask = align_req - 1;
327 
328       if (arch >= 7) {
329          rejected = ((explicit_layout->offset & align_mask) ||
330                      (explicit_layout->row_stride & align_mask));
331       } else {
332          rejected = (explicit_layout->offset & align_mask);
333       }
334 
335       if (rejected) {
336          mesa_loge(
337             "panfrost: rejecting image due to unsupported offset or stride "
338             "alignment.\n");
339          return false;
340       }
341    }
342 
343    unsigned fmt_blocksize = util_format_get_blocksize(layout->format);
344 
345    /* MSAA is implemented as a 3D texture with z corresponding to the
346     * sample #, horrifyingly enough */
347 
348    assert(layout->depth == 1 || layout->nr_samples == 1);
349 
350    bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR;
351    bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D;
352 
353    unsigned offset = explicit_layout ? explicit_layout->offset : 0;
354    struct pan_block_size block_size =
355       panfrost_block_size(layout->modifier, layout->format);
356 
357    unsigned width = layout->width;
358    unsigned height = layout->height;
359    unsigned depth = layout->depth;
360 
361    unsigned align_w = block_size.width;
362    unsigned align_h = block_size.height;
363 
364    /* For tiled AFBC, align to tiles of superblocks (this can be large) */
365    if (afbc) {
366       align_w *= pan_afbc_tile_size(layout->modifier);
367       align_h *= pan_afbc_tile_size(layout->modifier);
368    }
369 
370    for (unsigned l = 0; l < layout->nr_slices; ++l) {
371       struct pan_image_slice_layout *slice = &layout->slices[l];
372 
373       unsigned effective_width =
374          ALIGN_POT(util_format_get_nblocksx(layout->format, width), align_w);
375       unsigned effective_height =
376          ALIGN_POT(util_format_get_nblocksy(layout->format, height), align_h);
377 
378       /* Align levels to cache-line as a performance improvement for
379        * linear/tiled and as a requirement for AFBC */
380 
381       offset = ALIGN_POT(offset, pan_slice_align(layout->modifier));
382 
383       slice->offset = offset;
384 
385       unsigned row_stride = fmt_blocksize * effective_width * block_size.height;
386 
387       /* On v7+ row_stride and offset alignment requirement are equal */
388       if (arch >= 7) {
389          row_stride = ALIGN_POT(row_stride, align_req);
390       }
391 
392       if (explicit_layout && !afbc) {
393          /* Make sure the explicit stride is valid */
394          if (explicit_layout->row_stride < row_stride) {
395             mesa_loge("panfrost: rejecting image due to invalid row stride.\n");
396             return false;
397          }
398 
399          row_stride = explicit_layout->row_stride;
400       } else if (linear) {
401          /* Keep lines alignment on 64 byte for performance */
402          row_stride = ALIGN_POT(row_stride, 64);
403       }
404 
405       unsigned slice_one_size =
406          row_stride * (effective_height / block_size.height);
407 
408       /* Compute AFBC sizes if necessary */
409       if (afbc) {
410          slice->row_stride =
411             pan_afbc_row_stride(layout->modifier, effective_width);
412          slice->afbc.stride = effective_width / block_size.width;
413          slice->afbc.nr_blocks =
414             slice->afbc.stride * (effective_height / block_size.height);
415          slice->afbc.header_size =
416             ALIGN_POT(slice->row_stride * (effective_height / align_h),
417                       pan_afbc_body_align(layout->modifier));
418 
419          if (explicit_layout &&
420              explicit_layout->row_stride < slice->row_stride) {
421             mesa_loge("panfrost: rejecting image due to invalid row stride.\n");
422             return false;
423          }
424 
425          /* AFBC body size */
426          slice->afbc.body_size = slice_one_size;
427 
428          /* 3D AFBC resources have all headers placed at the
429           * beginning instead of having them split per depth
430           * level
431           */
432          if (is_3d) {
433             slice->afbc.surface_stride = slice->afbc.header_size;
434             slice->afbc.header_size *= depth;
435             slice->afbc.body_size *= depth;
436             offset += slice->afbc.header_size;
437          } else {
438             slice_one_size += slice->afbc.header_size;
439             slice->afbc.surface_stride = slice_one_size;
440          }
441       } else {
442          slice->row_stride = row_stride;
443       }
444 
445       unsigned slice_full_size = slice_one_size * depth * layout->nr_samples;
446 
447       slice->surface_stride = slice_one_size;
448 
449       /* Compute AFBC sizes if necessary */
450 
451       offset += slice_full_size;
452       slice->size = slice_full_size;
453 
454       /* Add a checksum region if necessary */
455       if (layout->crc) {
456          slice->crc.size = panfrost_compute_checksum_size(slice, width, height);
457 
458          slice->crc.offset = offset;
459          offset += slice->crc.size;
460          slice->size += slice->crc.size;
461       }
462 
463       width = u_minify(width, 1);
464       height = u_minify(height, 1);
465       depth = u_minify(depth, 1);
466    }
467 
468    /* Arrays and cubemaps have the entire miptree duplicated */
469    layout->array_stride = ALIGN_POT(offset, 64);
470    if (explicit_layout)
471       layout->data_size = offset;
472    else
473       layout->data_size =
474          ALIGN_POT(layout->array_stride * layout->array_size, 4096);
475 
476    return true;
477 }
478 
479 void
pan_iview_get_surface(const struct pan_image_view * iview,unsigned level,unsigned layer,unsigned sample,struct pan_surface * surf)480 pan_iview_get_surface(const struct pan_image_view *iview, unsigned level,
481                       unsigned layer, unsigned sample, struct pan_surface *surf)
482 {
483    const struct pan_image *image = pan_image_view_get_plane(iview, 0);
484 
485    level += iview->first_level;
486    assert(level < image->layout.nr_slices);
487 
488    layer += iview->first_layer;
489 
490    bool is_3d = image->layout.dim == MALI_TEXTURE_DIMENSION_3D;
491    const struct pan_image_slice_layout *slice = &image->layout.slices[level];
492    mali_ptr base = image->data.base + image->data.offset;
493 
494    if (drm_is_afbc(image->layout.modifier)) {
495       assert(!sample);
496 
497       if (is_3d) {
498          ASSERTED unsigned depth = u_minify(image->layout.depth, level);
499          assert(layer < depth);
500          surf->afbc.header =
501             base + slice->offset + (layer * slice->afbc.surface_stride);
502          surf->afbc.body = base + slice->offset + slice->afbc.header_size +
503                            (slice->surface_stride * layer);
504       } else {
505          assert(layer < image->layout.array_size);
506          surf->afbc.header =
507             base + panfrost_texture_offset(&image->layout, level, layer, 0);
508          surf->afbc.body = surf->afbc.header + slice->afbc.header_size;
509       }
510    } else {
511       unsigned array_idx = is_3d ? 0 : layer;
512       unsigned surface_idx = is_3d ? layer : sample;
513 
514       surf->data = base + panfrost_texture_offset(&image->layout, level,
515                                                   array_idx, surface_idx);
516    }
517 }
518