1 /*
2 * Copyright (C) 2008 VMware, Inc.
3 * Copyright (C) 2014 Broadcom
4 * Copyright (C) 2018-2019 Alyssa Rosenzweig
5 * Copyright (C) 2019-2020 Collabora, Ltd.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 */
27
28 #include "util/macros.h"
29 #include "util/u_math.h"
30 #include "pan_texture.h"
31 #include "panfrost-quirks.h"
32
33 /* Generates a texture descriptor. Ideally, descriptors are immutable after the
34 * texture is created, so we can keep these hanging around in GPU memory in a
35 * dedicated BO and not have to worry. In practice there are some minor gotchas
36 * with this (the driver sometimes will change the format of a texture on the
37 * fly for compression) but it's fast enough to just regenerate the descriptor
38 * in those cases, rather than monkeypatching at drawtime. A texture descriptor
39 * consists of a 32-byte header followed by pointers.
40 */
41
42 /* List of supported modifiers, in descending order of preference. AFBC is
43 * faster than u-interleaved tiling which is faster than linear. Within AFBC,
44 * enabling the YUV-like transform is typically a win where possible. */
45
46 uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
47 DRM_FORMAT_MOD_ARM_AFBC(
48 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
49 AFBC_FORMAT_MOD_SPARSE |
50 AFBC_FORMAT_MOD_YTR),
51
52 DRM_FORMAT_MOD_ARM_AFBC(
53 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
54 AFBC_FORMAT_MOD_SPARSE),
55
56 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
57 DRM_FORMAT_MOD_LINEAR
58 };
59
60 /* Map modifiers to mali_texture_layout for packing in a texture descriptor */
61
62 static enum mali_texture_layout
panfrost_modifier_to_layout(uint64_t modifier)63 panfrost_modifier_to_layout(uint64_t modifier)
64 {
65 if (drm_is_afbc(modifier))
66 return MALI_TEXTURE_LAYOUT_AFBC;
67 else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
68 return MALI_TEXTURE_LAYOUT_TILED;
69 else if (modifier == DRM_FORMAT_MOD_LINEAR)
70 return MALI_TEXTURE_LAYOUT_LINEAR;
71 else
72 unreachable("Invalid modifer");
73 }
74
75 /* Check if we need to set a custom stride by computing the "expected"
76 * stride and comparing it to what the user actually wants. Only applies
77 * to linear textures, since tiled/compressed textures have strict
78 * alignment requirements for their strides as it is */
79
80 static bool
panfrost_needs_explicit_stride(struct panfrost_slice * slices,uint16_t width,unsigned first_level,unsigned last_level,unsigned bytes_per_pixel)81 panfrost_needs_explicit_stride(
82 struct panfrost_slice *slices,
83 uint16_t width,
84 unsigned first_level, unsigned last_level,
85 unsigned bytes_per_pixel)
86 {
87 for (unsigned l = first_level; l <= last_level; ++l) {
88 unsigned actual = slices[l].stride;
89 unsigned expected = u_minify(width, l) * bytes_per_pixel;
90
91 if (actual != expected)
92 return true;
93 }
94
95 return false;
96 }
97
98 /* A Scalable Texture Compression (ASTC) corresponds to just a few texture type
99 * in the hardware, but in fact can be parametrized to have various widths and
100 * heights for the so-called "stretch factor". It turns out these parameters
101 * are stuffed in the bottom bits of the payload pointers. This functions
102 * computes these magic stuffing constants based on the ASTC format in use. The
103 * constant in a given dimension is 3-bits, and two are stored side-by-side for
104 * each active dimension.
105 */
106
107 static unsigned
panfrost_astc_stretch(unsigned dim)108 panfrost_astc_stretch(unsigned dim)
109 {
110 assert(dim >= 4 && dim <= 12);
111 return MIN2(dim, 11) - 4;
112 }
113
114 /* Texture addresses are tagged with information about compressed formats.
115 * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
116 * RGBA only).
117 * For ASTC, this is a "stretch factor" encoding the block size. */
118
119 static unsigned
panfrost_compression_tag(const struct util_format_description * desc,uint64_t modifier)120 panfrost_compression_tag(
121 const struct util_format_description *desc, uint64_t modifier)
122 {
123 if (drm_is_afbc(modifier))
124 return (modifier & AFBC_FORMAT_MOD_YTR) ? 1 : 0;
125 else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC)
126 return (panfrost_astc_stretch(desc->block.height) << 3) |
127 panfrost_astc_stretch(desc->block.width);
128 else
129 return 0;
130 }
131
132
133 /* Cubemaps have 6 faces as "layers" in between each actual layer. We
134 * need to fix this up. TODO: logic wrong in the asserted out cases ...
135 * can they happen, perhaps from cubemap arrays? */
136
137 static void
panfrost_adjust_cube_dimensions(unsigned * first_face,unsigned * last_face,unsigned * first_layer,unsigned * last_layer)138 panfrost_adjust_cube_dimensions(
139 unsigned *first_face, unsigned *last_face,
140 unsigned *first_layer, unsigned *last_layer)
141 {
142 *first_face = *first_layer % 6;
143 *last_face = *last_layer % 6;
144 *first_layer /= 6;
145 *last_layer /= 6;
146
147 assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
148 }
149
150 /* Following the texture descriptor is a number of pointers. How many? */
151
152 static unsigned
panfrost_texture_num_elements(unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,bool is_cube,bool manual_stride)153 panfrost_texture_num_elements(
154 unsigned first_level, unsigned last_level,
155 unsigned first_layer, unsigned last_layer,
156 unsigned nr_samples,
157 bool is_cube, bool manual_stride)
158 {
159 unsigned first_face = 0, last_face = 0;
160
161 if (is_cube) {
162 panfrost_adjust_cube_dimensions(&first_face, &last_face,
163 &first_layer, &last_layer);
164 }
165
166 unsigned levels = 1 + last_level - first_level;
167 unsigned layers = 1 + last_layer - first_layer;
168 unsigned faces = 1 + last_face - first_face;
169 unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1);
170
171 if (manual_stride)
172 num_elements *= 2;
173
174 return num_elements;
175 }
176
177 /* Conservative estimate of the size of the texture payload a priori.
178 * Average case, size equal to the actual size. Worst case, off by 2x (if
179 * a manual stride is not needed on a linear texture). Returned value
180 * must be greater than or equal to the actual size, so it's safe to use
181 * as an allocation amount */
182
183 unsigned
panfrost_estimate_texture_payload_size(unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,enum mali_texture_dimension dim,uint64_t modifier)184 panfrost_estimate_texture_payload_size(
185 unsigned first_level, unsigned last_level,
186 unsigned first_layer, unsigned last_layer,
187 unsigned nr_samples,
188 enum mali_texture_dimension dim, uint64_t modifier)
189 {
190 /* Assume worst case */
191 unsigned manual_stride = (modifier == DRM_FORMAT_MOD_LINEAR);
192
193 unsigned elements = panfrost_texture_num_elements(
194 first_level, last_level,
195 first_layer, last_layer,
196 nr_samples,
197 dim == MALI_TEXTURE_DIMENSION_CUBE, manual_stride);
198
199 return sizeof(mali_ptr) * elements;
200 }
201
202 /* If not explicitly, line stride is calculated for block-based formats as
203 * (ceil(width / block_width) * block_size). As a special case, this is left
204 * zero if there is only a single block vertically. So, we have a helper to
205 * extract the dimensions of a block-based format and use that to calculate the
206 * line stride as such.
207 */
208
209 static unsigned
panfrost_block_dim(uint64_t modifier,bool width,unsigned plane)210 panfrost_block_dim(uint64_t modifier, bool width, unsigned plane)
211 {
212 if (!drm_is_afbc(modifier)) {
213 assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
214 return 16;
215 }
216
217 switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
218 case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
219 return 16;
220 case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
221 return width ? 32 : 8;
222 case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
223 return width ? 64 : 4;
224 case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4:
225 return plane ? (width ? 64 : 4) : (width ? 32 : 8);
226 default:
227 unreachable("Invalid AFBC block size");
228 }
229 }
230
231 static unsigned
panfrost_nonlinear_stride(uint64_t modifier,unsigned bytes_per_block,unsigned pixels_per_block,unsigned width,unsigned height,bool plane)232 panfrost_nonlinear_stride(uint64_t modifier,
233 unsigned bytes_per_block,
234 unsigned pixels_per_block,
235 unsigned width,
236 unsigned height,
237 bool plane)
238 {
239 unsigned block_w = panfrost_block_dim(modifier, true, plane);
240 unsigned block_h = panfrost_block_dim(modifier, false, plane);
241
242 /* Calculate block size. Ensure the division happens only at the end to
243 * avoid rounding errors if bytes per block < pixels per block */
244
245 unsigned block_size = (block_w * block_h * bytes_per_block)
246 / pixels_per_block;
247
248 if (height <= block_h)
249 return 0;
250 else
251 return DIV_ROUND_UP(width, block_w) * block_size;
252 }
253
254 static void
panfrost_emit_texture_payload(mali_ptr * payload,const struct util_format_description * desc,enum mali_texture_dimension dim,uint64_t modifier,unsigned width,unsigned height,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,bool manual_stride,mali_ptr base,struct panfrost_slice * slices)255 panfrost_emit_texture_payload(
256 mali_ptr *payload,
257 const struct util_format_description *desc,
258 enum mali_texture_dimension dim,
259 uint64_t modifier,
260 unsigned width, unsigned height,
261 unsigned first_level, unsigned last_level,
262 unsigned first_layer, unsigned last_layer,
263 unsigned nr_samples,
264 unsigned cube_stride,
265 bool manual_stride,
266 mali_ptr base,
267 struct panfrost_slice *slices)
268 {
269 base |= panfrost_compression_tag(desc, modifier);
270
271 /* Inject the addresses in, interleaving array indices, mip levels,
272 * cube faces, and strides in that order */
273
274 unsigned first_face = 0, last_face = 0, face_mult = 1;
275
276 if (dim == MALI_TEXTURE_DIMENSION_CUBE) {
277 face_mult = 6;
278 panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);
279 }
280
281 nr_samples = MAX2(nr_samples, 1);
282
283 unsigned idx = 0;
284
285 for (unsigned w = first_layer; w <= last_layer; ++w) {
286 for (unsigned l = first_level; l <= last_level; ++l) {
287 for (unsigned f = first_face; f <= last_face; ++f) {
288 for (unsigned s = 0; s < nr_samples; ++s) {
289 payload[idx++] = base + panfrost_texture_offset(
290 slices, dim == MALI_TEXTURE_DIMENSION_3D,
291 cube_stride, l, w * face_mult + f, s);
292
293 if (manual_stride) {
294 payload[idx++] = (modifier == DRM_FORMAT_MOD_LINEAR) ?
295 slices[l].stride :
296 panfrost_nonlinear_stride(modifier,
297 MAX2(desc->block.bits / 8, 1),
298 desc->block.width * desc->block.height,
299 u_minify(width, l),
300 u_minify(height, l), false);
301 }
302 }
303 }
304 }
305 }
306 }
307
308 static void
panfrost_emit_texture_payload_v7(mali_ptr * payload,const struct util_format_description * desc,enum mali_texture_dimension dim,uint64_t modifier,unsigned width,unsigned height,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,mali_ptr base,struct panfrost_slice * slices)309 panfrost_emit_texture_payload_v7(mali_ptr *payload,
310 const struct util_format_description *desc,
311 enum mali_texture_dimension dim,
312 uint64_t modifier,
313 unsigned width, unsigned height,
314 unsigned first_level, unsigned last_level,
315 unsigned first_layer, unsigned last_layer,
316 unsigned nr_samples,
317 unsigned cube_stride,
318 mali_ptr base,
319 struct panfrost_slice *slices)
320 {
321 base |= panfrost_compression_tag(desc, modifier);
322
323 /* Inject the addresses in, interleaving array indices, mip levels,
324 * cube faces, and strides in that order */
325
326 unsigned first_face = 0, last_face = 0, face_mult = 1;
327
328 if (dim == MALI_TEXTURE_DIMENSION_CUBE) {
329 face_mult = 6;
330 panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);
331 }
332
333 nr_samples = MAX2(nr_samples, 1);
334
335 unsigned idx = 0;
336 bool is_3d = dim == MALI_TEXTURE_DIMENSION_3D;
337 bool is_linear = modifier == DRM_FORMAT_MOD_LINEAR;
338
339 assert(nr_samples == 1 || face_mult == 1);
340
341 for (unsigned w = first_layer; w <= last_layer; ++w) {
342 for (unsigned f = first_face; f <= last_face; ++f) {
343 for (unsigned s = 0; s < nr_samples; ++s) {
344 for (unsigned l = first_level; l <= last_level; ++l) {
345 payload[idx++] =
346 base +
347 panfrost_texture_offset(slices, is_3d,
348 cube_stride, l,
349 w * face_mult + f, s);
350
351 unsigned line_stride =
352 is_linear ?
353 slices[l].stride :
354 panfrost_nonlinear_stride(modifier,
355 MAX2(desc->block.bits / 8, 1),
356 desc->block.width * desc->block.height,
357 u_minify(width, l),
358 u_minify(height, l), false);
359 unsigned layer_stride = 0; /* FIXME */
360 payload[idx++] = ((uint64_t)layer_stride << 32) | line_stride;
361 }
362 }
363 }
364 }
365 }
366
367 void
panfrost_new_texture(void * out,uint16_t width,uint16_t height,uint16_t depth,uint16_t array_size,enum pipe_format format,enum mali_texture_dimension dim,uint64_t modifier,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,unsigned swizzle,mali_ptr base,struct panfrost_slice * slices)368 panfrost_new_texture(
369 void *out,
370 uint16_t width, uint16_t height,
371 uint16_t depth, uint16_t array_size,
372 enum pipe_format format,
373 enum mali_texture_dimension dim,
374 uint64_t modifier,
375 unsigned first_level, unsigned last_level,
376 unsigned first_layer, unsigned last_layer,
377 unsigned nr_samples,
378 unsigned cube_stride,
379 unsigned swizzle,
380 mali_ptr base,
381 struct panfrost_slice *slices)
382 {
383 const struct util_format_description *desc =
384 util_format_description(format);
385
386 unsigned bytes_per_pixel = util_format_get_blocksize(format);
387
388 bool manual_stride = (modifier == DRM_FORMAT_MOD_LINEAR)
389 && panfrost_needs_explicit_stride(slices, width,
390 first_level, last_level, bytes_per_pixel);
391
392 pan_pack(out, MIDGARD_TEXTURE, cfg) {
393 cfg.width = u_minify(width, first_level);
394 cfg.height = u_minify(height, first_level);
395 cfg.depth = u_minify(depth, first_level);
396 cfg.array_size = array_size;
397 cfg.format = panfrost_pipe_format_v6[format].hw;
398 cfg.dimension = dim;
399 cfg.texel_ordering = panfrost_modifier_to_layout(modifier);
400 cfg.manual_stride = manual_stride;
401 cfg.levels = last_level - first_level;
402 cfg.swizzle = swizzle;
403 };
404
405 panfrost_emit_texture_payload(
406 (mali_ptr *) (out + MALI_MIDGARD_TEXTURE_LENGTH),
407 desc,
408 dim,
409 modifier,
410 width, height,
411 first_level, last_level,
412 first_layer, last_layer,
413 nr_samples,
414 cube_stride,
415 manual_stride,
416 base,
417 slices);
418 }
419
420 void
panfrost_new_texture_bifrost(const struct panfrost_device * dev,struct mali_bifrost_texture_packed * out,uint16_t width,uint16_t height,uint16_t depth,uint16_t array_size,enum pipe_format format,enum mali_texture_dimension dim,uint64_t modifier,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,unsigned swizzle,mali_ptr base,struct panfrost_slice * slices,const struct panfrost_ptr * payload)421 panfrost_new_texture_bifrost(
422 const struct panfrost_device *dev,
423 struct mali_bifrost_texture_packed *out,
424 uint16_t width, uint16_t height,
425 uint16_t depth, uint16_t array_size,
426 enum pipe_format format,
427 enum mali_texture_dimension dim,
428 uint64_t modifier,
429 unsigned first_level, unsigned last_level,
430 unsigned first_layer, unsigned last_layer,
431 unsigned nr_samples,
432 unsigned cube_stride,
433 unsigned swizzle,
434 mali_ptr base,
435 struct panfrost_slice *slices,
436 const struct panfrost_ptr *payload)
437 {
438 const struct util_format_description *desc =
439 util_format_description(format);
440
441 if (dev->arch >= 7) {
442 panfrost_emit_texture_payload_v7(payload->cpu,
443 desc,
444 dim,
445 modifier,
446 width, height,
447 first_level, last_level,
448 first_layer, last_layer,
449 nr_samples,
450 cube_stride,
451 base,
452 slices);
453 } else {
454 panfrost_emit_texture_payload(payload->cpu,
455 desc,
456 dim,
457 modifier,
458 width, height,
459 first_level, last_level,
460 first_layer, last_layer,
461 nr_samples,
462 cube_stride,
463 true, /* Stride explicit on Bifrost */
464 base,
465 slices);
466 }
467
468 pan_pack(out, BIFROST_TEXTURE, cfg) {
469 cfg.dimension = dim;
470 cfg.format = dev->formats[format].hw;
471
472 cfg.width = u_minify(width, first_level);
473 cfg.height = u_minify(height, first_level);
474 cfg.swizzle = swizzle;
475 cfg.texel_ordering = panfrost_modifier_to_layout(modifier);
476 cfg.levels = last_level - first_level;
477 cfg.array_size = array_size;
478 cfg.surfaces = payload->gpu;
479
480 /* We specify API-level LOD clamps in the sampler descriptor
481 * and use these clamps simply for bounds checking */
482 cfg.minimum_lod = FIXED_16(0, false);
483 cfg.maximum_lod = FIXED_16(cfg.levels, false);
484 }
485 }
486
487 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
488 * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
489 * This feature is also known as "transaction elimination". */
490
491 #define CHECKSUM_TILE_WIDTH 16
492 #define CHECKSUM_TILE_HEIGHT 16
493 #define CHECKSUM_BYTES_PER_TILE 8
494
495 unsigned
panfrost_compute_checksum_size(struct panfrost_slice * slice,unsigned width,unsigned height)496 panfrost_compute_checksum_size(
497 struct panfrost_slice *slice,
498 unsigned width,
499 unsigned height)
500 {
501 unsigned aligned_width = ALIGN_POT(width, CHECKSUM_TILE_WIDTH);
502 unsigned aligned_height = ALIGN_POT(height, CHECKSUM_TILE_HEIGHT);
503
504 unsigned tile_count_x = aligned_width / CHECKSUM_TILE_WIDTH;
505 unsigned tile_count_y = aligned_height / CHECKSUM_TILE_HEIGHT;
506
507 slice->checksum_stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
508
509 return slice->checksum_stride * tile_count_y;
510 }
511
512 unsigned
panfrost_get_layer_stride(struct panfrost_slice * slices,bool is_3d,unsigned cube_stride,unsigned level)513 panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level)
514 {
515 return is_3d ? slices[level].size0 : cube_stride;
516 }
517
518 /* Computes the offset into a texture at a particular level/face. Add to
519 * the base address of a texture to get the address to that level/face */
520
521 unsigned
panfrost_texture_offset(struct panfrost_slice * slices,bool is_3d,unsigned cube_stride,unsigned level,unsigned face,unsigned sample)522 panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face, unsigned sample)
523 {
524 unsigned layer_stride = panfrost_get_layer_stride(slices, is_3d, cube_stride, level);
525 return slices[level].offset + (face * layer_stride) + (sample * slices[level].size0);
526 }
527