• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <robclark@freedesktop.org>
8  */
9 
10 #define FD_BO_NO_HARDPIN 1
11 
12 #include "drm-uapi/drm_fourcc.h"
13 
14 #include "a6xx/fd6_blitter.h"
15 #include "fd6_resource.h"
16 #include "fdl/fd6_format_table.h"
17 #include "common/freedreno_lrz.h"
18 #include "common/freedreno_ubwc.h"
19 
20 #include "a6xx.xml.h"
21 
22 /* A subset of the valid tiled formats can be compressed.  We do
23  * already require tiled in order to be compressed, but just because
24  * it can be tiled doesn't mean it can be compressed.
25  */
26 static bool
ok_ubwc_format(struct pipe_screen * pscreen,enum pipe_format pfmt,unsigned nr_samples)27 ok_ubwc_format(struct pipe_screen *pscreen, enum pipe_format pfmt, unsigned nr_samples)
28 {
29    const struct fd_dev_info *info = fd_screen(pscreen)->info;
30 
31    switch (pfmt) {
32    case PIPE_FORMAT_Z24X8_UNORM:
33       /* MSAA+UBWC does not work without FMT6_Z24_UINT_S8_UINT: */
34       return info->a6xx.has_z24uint_s8uint || (nr_samples <= 1);
35 
36    case PIPE_FORMAT_X24S8_UINT:
37    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
38       /* We can't sample stencil with UBWC on a630, and we may need to be able
39        * to sample stencil at some point.  We can't just use
40        * fd_resource_uncompress() at the point of stencil sampling because
41        * that itself uses stencil sampling in the fd_blitter_blit path.
42        */
43       return info->a6xx.has_z24uint_s8uint;
44 
45    case PIPE_FORMAT_R8_G8B8_420_UNORM:
46       /* The difference between NV12 and R8_G8B8_420_UNORM is only where the
47        * conversion to RGB happens, with the latter it happens _after_ the
48        * texture samp instruction.  But dri2_get_mapping_by_fourcc() doesn't
49        * know this, so it asks for NV12 when it really meant to ask for
50        * R8_G8B8_420_UNORM.  Just treat them the same here to work around it:
51        */
52    case PIPE_FORMAT_NV12:
53       return true;
54 
55    default:
56       break;
57    }
58 
59    /* In copy_format, we treat snorm as unorm to avoid clamping.  But snorm
60     * and unorm are UBWC incompatible for special values such as all 0's or
61     * all 1's prior to a740.  Disable UBWC for snorm.
62     */
63    if (util_format_is_snorm(pfmt) &&
64        !info->a7xx.ubwc_unorm_snorm_int_compatible)
65       return false;
66 
67    /* A690 seem to have broken UBWC for depth/stencil, it requires
68     * depth flushing where we cannot realistically place it, like between
69     * ordinary draw calls writing read/depth. WSL blob seem to use ubwc
70     * sometimes for depth/stencil.
71     */
72    if (info->a6xx.broken_ds_ubwc_quirk &&
73        util_format_is_depth_or_stencil(pfmt))
74       return false;
75 
76    switch (fd6_color_format(pfmt, TILE6_LINEAR)) {
77    case FMT6_10_10_10_2_UINT:
78    case FMT6_10_10_10_2_UNORM_DEST:
79    case FMT6_11_11_10_FLOAT:
80    case FMT6_16_FLOAT:
81    case FMT6_16_16_16_16_FLOAT:
82    case FMT6_16_16_16_16_SINT:
83    case FMT6_16_16_16_16_UINT:
84    case FMT6_16_16_FLOAT:
85    case FMT6_16_16_SINT:
86    case FMT6_16_16_UINT:
87    case FMT6_16_SINT:
88    case FMT6_16_UINT:
89    case FMT6_32_32_32_32_SINT:
90    case FMT6_32_32_32_32_UINT:
91    case FMT6_32_32_SINT:
92    case FMT6_32_32_UINT:
93    case FMT6_5_6_5_UNORM:
94    case FMT6_5_5_5_1_UNORM:
95    case FMT6_8_8_8_8_SINT:
96    case FMT6_8_8_8_8_UINT:
97    case FMT6_8_8_8_8_UNORM:
98    case FMT6_8_8_8_X8_UNORM:
99    case FMT6_8_8_SINT:
100    case FMT6_8_8_UINT:
101    case FMT6_8_8_UNORM:
102    case FMT6_Z24_UNORM_S8_UINT:
103    case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
104       return true;
105    case FMT6_8_UNORM:
106       return info->a6xx.has_8bpp_ubwc;
107    default:
108       return false;
109    }
110 }
111 
112 static bool
can_do_ubwc(struct pipe_resource * prsc)113 can_do_ubwc(struct pipe_resource *prsc)
114 {
115    /* limit things to simple single level 2d for now: */
116    if ((prsc->depth0 != 1) || (prsc->array_size != 1) ||
117        (prsc->last_level != 0))
118       return false;
119    if (prsc->target != PIPE_TEXTURE_2D)
120       return false;
121    if (!ok_ubwc_format(prsc->screen, prsc->format, prsc->nr_samples))
122       return false;
123    return true;
124 }
125 
126 static bool
is_z24s8(enum pipe_format format)127 is_z24s8(enum pipe_format format)
128 {
129    switch (format) {
130    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
131    case PIPE_FORMAT_Z24X8_UNORM:
132    case PIPE_FORMAT_X24S8_UINT:
133    case PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
134       return true;
135    default:
136       return false;
137    }
138 }
139 
140 static bool
valid_ubwc_format_cast(struct fd_resource * rsc,enum pipe_format format)141 valid_ubwc_format_cast(struct fd_resource *rsc, enum pipe_format format)
142 {
143    const struct fd_dev_info *info = fd_screen(rsc->b.b.screen)->info;
144    enum pipe_format orig_format = rsc->b.b.format;
145 
146    assert(rsc->layout.ubwc);
147 
148    /* Special case "casting" format in hw: */
149    if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)
150       return true;
151 
152    /* If we support z24s8 ubwc then allow casts between the various
153     * permutations of z24s8:
154     */
155    if (info->a6xx.has_z24uint_s8uint && is_z24s8(format) && is_z24s8(orig_format))
156       return true;
157 
158    enum fd6_ubwc_compat_type type = fd6_ubwc_compat_mode(info, orig_format);
159    if (type == FD6_UBWC_UNKNOWN_COMPAT)
160       return false;
161 
162    return fd6_ubwc_compat_mode(info, format) == type;
163 }
164 
165 /**
166  * R8G8 have a different block width/height and height alignment from other
167  * formats that would normally be compatible (like R16), and so if we are
168  * trying to, for example, sample R16 as R8G8 we need to demote to linear.
169  */
170 static bool
is_r8g8(enum pipe_format format)171 is_r8g8(enum pipe_format format)
172 {
173    return (util_format_get_blocksize(format) == 2) &&
174          (util_format_get_nr_components(format) == 2);
175 }
176 
177 /**
178  * Can a rsc as it is currently laid out be accessed as the specified format.
179  * Returns whether the access is ok or whether the rsc needs to be demoted
180  * to uncompressed tiled or linear.
181  */
182 enum fd6_format_status
fd6_check_valid_format(struct fd_resource * rsc,enum pipe_format format)183 fd6_check_valid_format(struct fd_resource *rsc, enum pipe_format format)
184 {
185    enum pipe_format orig_format = rsc->b.b.format;
186 
187    if (orig_format == format)
188       return FORMAT_OK;
189 
190    if (rsc->layout.tile_mode && (is_r8g8(orig_format) != is_r8g8(format)))
191       return DEMOTE_TO_LINEAR;
192 
193    if (!rsc->layout.ubwc)
194       return FORMAT_OK;
195 
196    if (ok_ubwc_format(rsc->b.b.screen, format, rsc->b.b.nr_samples) &&
197        valid_ubwc_format_cast(rsc, format))
198       return FORMAT_OK;
199 
200    return DEMOTE_TO_TILED;
201 }
202 
203 /**
204  * Ensure the rsc is in an ok state to be used with the specified format.
205  * This handles the case of UBWC buffers used with non-UBWC compatible
206  * formats, by triggering an uncompress.
207  */
208 void
fd6_validate_format(struct fd_context * ctx,struct fd_resource * rsc,enum pipe_format format)209 fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
210                     enum pipe_format format)
211 {
212    tc_assert_driver_thread(ctx->tc);
213 
214    switch (fd6_check_valid_format(rsc, format)) {
215    case FORMAT_OK:
216       return;
217    case DEMOTE_TO_LINEAR:
218       perf_debug_ctx(ctx,
219                      "%" PRSC_FMT ": demoted to linear+uncompressed due to use as %s",
220                      PRSC_ARGS(&rsc->b.b), util_format_short_name(format));
221 
222       fd_resource_uncompress(ctx, rsc, true);
223       return;
224    case DEMOTE_TO_TILED:
225       perf_debug_ctx(ctx,
226                      "%" PRSC_FMT ": demoted to uncompressed due to use as %s",
227                      PRSC_ARGS(&rsc->b.b), util_format_short_name(format));
228 
229       fd_resource_uncompress(ctx, rsc, false);
230       return;
231    }
232 }
233 
234 template <chip CHIP>
235 static void
setup_lrz(struct fd_resource * rsc)236 setup_lrz(struct fd_resource *rsc)
237 {
238    struct fd_screen *screen = fd_screen(rsc->b.b.screen);
239    struct fdl_layout *layout = &rsc->layout;
240    unsigned width = layout->width0;
241    unsigned height = layout->height0;
242 
243    /* LRZ buffer is super-sampled: */
244    switch (layout->nr_samples) {
245    case 4:
246       width *= 2;
247       FALLTHROUGH;
248    case 2:
249       height *= 2;
250    }
251 
252    unsigned lrz_pitch = align(DIV_ROUND_UP(width, 8), 32);
253    unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 32);
254 
255    rsc->lrz_height = lrz_height;
256    rsc->lrz_width = lrz_pitch;
257    rsc->lrz_pitch = lrz_pitch;
258 
259    unsigned lrz_size = lrz_pitch * lrz_height * sizeof(uint16_t);
260 
261    unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width, 8), 16);
262    unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height, 8), 4);
263 
264    /* Fast-clear buffer is 1bit/block */
265    unsigned lrz_fc_size = DIV_ROUND_UP(nblocksx * nblocksy, 8);
266 
267    /* Fast-clear buffer cannot be larger than 512 bytes on A6XX and 1024 bytes
268     * on A7XX (HW limitation)
269     */
270    bool has_lrz_fc = screen->info->a6xx.enable_lrz_fast_clear &&
271                      lrz_fc_size <= fd_lrzfc_layout<CHIP>::FC_SIZE &&
272                      !FD_DBG(NOLRZFC);
273 
274    /* Allocate a LRZ fast-clear buffer even if we aren't using FC, if the
275     * hw is re-using this buffer for direction tracking
276     */
277    if (has_lrz_fc || screen->info->a6xx.has_lrz_dir_tracking) {
278       rsc->lrz_fc_offset = lrz_size;
279       lrz_size += sizeof(fd_lrzfc_layout<CHIP>);
280    }
281 
282    rsc->lrz = fd_bo_new(screen->dev, lrz_size, FD_BO_NOMAP, "lrz");
283 }
284 
285 template <chip CHIP>
286 static uint32_t
fd6_setup_slices(struct fd_resource * rsc)287 fd6_setup_slices(struct fd_resource *rsc)
288 {
289    struct pipe_resource *prsc = &rsc->b.b;
290    struct fd_screen *screen = fd_screen(prsc->screen);
291 
292    if (rsc->layout.ubwc && !ok_ubwc_format(prsc->screen, prsc->format, prsc->nr_samples))
293       rsc->layout.ubwc = false;
294 
295    fdl6_layout(&rsc->layout, screen->info, prsc->format, fd_resource_nr_samples(prsc),
296                prsc->width0, prsc->height0, prsc->depth0, prsc->last_level + 1,
297                prsc->array_size, prsc->target == PIPE_TEXTURE_3D, false, NULL);
298 
299    if (!FD_DBG(NOLRZ) && has_depth(prsc->format) && !is_z32(prsc->format))
300       setup_lrz<CHIP>(rsc);
301 
302    return rsc->layout.size;
303 }
304 
305 static int
fill_ubwc_buffer_sizes(struct fd_resource * rsc)306 fill_ubwc_buffer_sizes(struct fd_resource *rsc)
307 {
308    struct pipe_resource *prsc = &rsc->b.b;
309    struct fd_screen *screen = fd_screen(prsc->screen);
310    struct fdl_explicit_layout l = {
311       .offset = rsc->layout.slices[0].offset,
312       .pitch = rsc->layout.pitch0,
313    };
314 
315    if (!can_do_ubwc(prsc))
316       return -1;
317 
318    rsc->layout.ubwc = true;
319    rsc->layout.tile_mode = TILE6_3;
320 
321    if (!fdl6_layout(&rsc->layout, screen->info, prsc->format, fd_resource_nr_samples(prsc),
322                     prsc->width0, prsc->height0, prsc->depth0,
323                     prsc->last_level + 1, prsc->array_size, false, false, &l))
324       return -1;
325 
326    if (rsc->layout.size > fd_bo_size(rsc->bo))
327       return -1;
328 
329    return 0;
330 }
331 
332 static int
fd6_layout_resource_for_modifier(struct fd_resource * rsc,uint64_t modifier)333 fd6_layout_resource_for_modifier(struct fd_resource *rsc, uint64_t modifier)
334 {
335    switch (modifier) {
336    case DRM_FORMAT_MOD_QCOM_COMPRESSED:
337       return fill_ubwc_buffer_sizes(rsc);
338    case DRM_FORMAT_MOD_LINEAR:
339       if (can_do_ubwc(&rsc->b.b)) {
340          perf_debug("%" PRSC_FMT
341                     ": not UBWC: imported with DRM_FORMAT_MOD_LINEAR!",
342                     PRSC_ARGS(&rsc->b.b));
343       }
344       return 0;
345    case DRM_FORMAT_MOD_QCOM_TILED3:
346       rsc->layout.tile_mode = fd6_tile_mode(&rsc->b.b);
347       FALLTHROUGH;
348    case DRM_FORMAT_MOD_INVALID:
349       /* For now, without buffer metadata, we must assume that buffers
350        * imported with INVALID modifier are linear
351        */
352       if (can_do_ubwc(&rsc->b.b)) {
353          perf_debug("%" PRSC_FMT
354                     ": not UBWC: imported with DRM_FORMAT_MOD_INVALID!",
355                     PRSC_ARGS(&rsc->b.b));
356       }
357       return 0;
358    default:
359       return -1;
360    }
361 }
362 
363 static bool
fd6_is_format_supported(struct pipe_screen * pscreen,enum pipe_format fmt,uint64_t modifier)364 fd6_is_format_supported(struct pipe_screen *pscreen,
365                         enum pipe_format fmt,
366                         uint64_t modifier)
367 {
368    switch (modifier) {
369    case DRM_FORMAT_MOD_LINEAR:
370       return true;
371    case DRM_FORMAT_MOD_QCOM_COMPRESSED:
372       /* screen->is_format_supported() is used only for dma-buf modifier queries,
373        * so no super-sampled images:
374        */
375       return ok_ubwc_format(pscreen, fmt, 0);
376    case DRM_FORMAT_MOD_QCOM_TILED3:
377       return fd6_tile_mode_for_format(fmt) == TILE6_3;
378    default:
379       return false;
380    }
381 }
382 
383 template <chip CHIP>
384 void
fd6_resource_screen_init(struct pipe_screen * pscreen)385 fd6_resource_screen_init(struct pipe_screen *pscreen)
386 {
387    struct fd_screen *screen = fd_screen(pscreen);
388 
389    screen->setup_slices = fd6_setup_slices<CHIP>;
390    screen->layout_resource_for_modifier = fd6_layout_resource_for_modifier;
391    screen->is_format_supported = fd6_is_format_supported;
392 }
393 FD_GENX(fd6_resource_screen_init);
394