• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 #define FD_BO_NO_HARDPIN 1
29 
30 #include "util/format_srgb.h"
31 #include "util/half_float.h"
32 #include "util/u_dump.h"
33 #include "util/u_log.h"
34 #include "util/u_surface.h"
35 
36 #include "freedreno_blitter.h"
37 #include "freedreno_fence.h"
38 #include "freedreno_resource.h"
39 #include "freedreno_tracepoints.h"
40 
41 #include "fd6_barrier.h"
42 #include "fd6_blitter.h"
43 #include "fd6_emit.h"
44 #include "fd6_pack.h"
45 #include "fd6_resource.h"
46 
47 static inline enum a6xx_2d_ifmt
fd6_ifmt(enum a6xx_format fmt)48 fd6_ifmt(enum a6xx_format fmt)
49 {
50    switch (fmt) {
51    case FMT6_A8_UNORM:
52    case FMT6_8_UNORM:
53    case FMT6_8_SNORM:
54    case FMT6_8_8_UNORM:
55    case FMT6_8_8_SNORM:
56    case FMT6_8_8_8_8_UNORM:
57    case FMT6_8_8_8_X8_UNORM:
58    case FMT6_8_8_8_8_SNORM:
59    case FMT6_4_4_4_4_UNORM:
60    case FMT6_5_5_5_1_UNORM:
61    case FMT6_5_6_5_UNORM:
62       return R2D_UNORM8;
63 
64    case FMT6_32_UINT:
65    case FMT6_32_SINT:
66    case FMT6_32_32_UINT:
67    case FMT6_32_32_SINT:
68    case FMT6_32_32_32_32_UINT:
69    case FMT6_32_32_32_32_SINT:
70       return R2D_INT32;
71 
72    case FMT6_16_UINT:
73    case FMT6_16_SINT:
74    case FMT6_16_16_UINT:
75    case FMT6_16_16_SINT:
76    case FMT6_16_16_16_16_UINT:
77    case FMT6_16_16_16_16_SINT:
78    case FMT6_10_10_10_2_UINT:
79       return R2D_INT16;
80 
81    case FMT6_8_UINT:
82    case FMT6_8_SINT:
83    case FMT6_8_8_UINT:
84    case FMT6_8_8_SINT:
85    case FMT6_8_8_8_8_UINT:
86    case FMT6_8_8_8_8_SINT:
87    case FMT6_Z24_UNORM_S8_UINT:
88    case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
89       return R2D_INT8;
90 
91    case FMT6_16_UNORM:
92    case FMT6_16_SNORM:
93    case FMT6_16_16_UNORM:
94    case FMT6_16_16_SNORM:
95    case FMT6_16_16_16_16_UNORM:
96    case FMT6_16_16_16_16_SNORM:
97    case FMT6_32_FLOAT:
98    case FMT6_32_32_FLOAT:
99    case FMT6_32_32_32_32_FLOAT:
100       return R2D_FLOAT32;
101 
102    case FMT6_16_FLOAT:
103    case FMT6_16_16_FLOAT:
104    case FMT6_16_16_16_16_FLOAT:
105    case FMT6_11_11_10_FLOAT:
106    case FMT6_10_10_10_2_UNORM_DEST:
107       return R2D_FLOAT16;
108 
109    default:
110       unreachable("bad format");
111       return (enum a6xx_2d_ifmt)0;
112    }
113 }
114 
115 /* Make sure none of the requested dimensions extend beyond the size of the
116  * resource.  Not entirely sure why this happens, but sometimes it does, and
117  * w/ 2d blt doesn't have wrap modes like a sampler, so force those cases
118  * back to u_blitter
119  */
120 static bool
ok_dims(const struct pipe_resource * r,const struct pipe_box * b,int lvl)121 ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
122 {
123    int last_layer =
124       r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl) : r->array_size;
125 
126    return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
127           (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
128           (b->z >= 0) && (b->z + b->depth <= last_layer);
129 }
130 
131 static bool
ok_format(enum pipe_format pfmt)132 ok_format(enum pipe_format pfmt)
133 {
134    enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
135 
136    if (util_format_is_compressed(pfmt))
137       return true;
138 
139    switch (pfmt) {
140    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
141    case PIPE_FORMAT_Z24X8_UNORM:
142    case PIPE_FORMAT_Z16_UNORM:
143    case PIPE_FORMAT_Z32_UNORM:
144    case PIPE_FORMAT_Z32_FLOAT:
145    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
146    case PIPE_FORMAT_S8_UINT:
147       return true;
148    default:
149       break;
150    }
151 
152    if (fmt == FMT6_NONE)
153       return false;
154 
155    return true;
156 }
157 
158 #define DEBUG_BLIT          0
159 #define DEBUG_BLIT_FALLBACK 0
160 
161 #define fail_if(cond)                                                          \
162    do {                                                                        \
163       if (cond) {                                                              \
164          if (DEBUG_BLIT_FALLBACK) {                                            \
165             fprintf(stderr, "falling back: %s for blit:\n", #cond);            \
166             dump_blit_info(info);                                              \
167          }                                                                     \
168          return false;                                                         \
169       }                                                                        \
170    } while (0)
171 
172 static bool
is_ubwc(struct pipe_resource * prsc,unsigned level)173 is_ubwc(struct pipe_resource *prsc, unsigned level)
174 {
175    return fd_resource_ubwc_enabled(fd_resource(prsc), level);
176 }
177 
178 static void
dump_blit_info(const struct pipe_blit_info * info)179 dump_blit_info(const struct pipe_blit_info *info)
180 {
181    util_dump_blit_info(stderr, info);
182    fprintf(stderr, "\ndst resource: ");
183    util_dump_resource(stderr, info->dst.resource);
184    if (is_ubwc(info->dst.resource, info->dst.level))
185       fprintf(stderr, " (ubwc)");
186    fprintf(stderr, "\nsrc resource: ");
187    util_dump_resource(stderr, info->src.resource);
188    if (is_ubwc(info->src.resource, info->src.level))
189       fprintf(stderr, " (ubwc)");
190    fprintf(stderr, "\n");
191 }
192 
193 static bool
can_do_blit(const struct pipe_blit_info * info)194 can_do_blit(const struct pipe_blit_info *info)
195 {
196    /* I think we can do scaling, but not in z dimension since that would
197     * require blending..
198     */
199    fail_if(info->dst.box.depth != info->src.box.depth);
200 
201    /* Fail if unsupported format: */
202    fail_if(!ok_format(info->src.format));
203    fail_if(!ok_format(info->dst.format));
204 
205    assert(!util_format_is_compressed(info->src.format));
206    assert(!util_format_is_compressed(info->dst.format));
207 
208    fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
209 
210    fail_if(!ok_dims(info->dst.resource, &info->dst.box, info->dst.level));
211 
212    assert(info->dst.box.width >= 0);
213    assert(info->dst.box.height >= 0);
214    assert(info->dst.box.depth >= 0);
215 
216    fail_if(info->dst.resource->nr_samples > 1);
217 
218    fail_if(info->window_rectangle_include);
219 
220    const struct util_format_description *src_desc =
221       util_format_description(info->src.format);
222    const struct util_format_description *dst_desc =
223       util_format_description(info->dst.format);
224    const int common_channels =
225       MIN2(src_desc->nr_channels, dst_desc->nr_channels);
226 
227    if (info->mask & PIPE_MASK_RGBA) {
228       for (int i = 0; i < common_channels; i++) {
229          fail_if(memcmp(&src_desc->channel[i], &dst_desc->channel[i],
230                         sizeof(src_desc->channel[0])));
231       }
232    }
233 
234    fail_if(info->alpha_blend);
235 
236    return true;
237 }
238 
239 static bool
can_do_clear(const struct pipe_resource * prsc,unsigned level,const struct pipe_box * box)240 can_do_clear(const struct pipe_resource *prsc, unsigned level,
241              const struct pipe_box *box)
242 {
243    return ok_format(prsc->format) &&
244           ok_dims(prsc, box, level) &&
245           (fd_resource_nr_samples(prsc) == 1);
246 
247    return true;
248 }
249 
250 static void
emit_setup(struct fd_batch * batch)251 emit_setup(struct fd_batch *batch)
252 {
253    struct fd_ringbuffer *ring = batch->draw;
254    struct fd_screen *screen = batch->ctx->screen;
255 
256    fd6_emit_flushes(batch->ctx, ring,
257                     FD6_FLUSH_CCU_COLOR |
258                     FD6_INVALIDATE_CCU_COLOR |
259                     FD6_FLUSH_CCU_DEPTH |
260                     FD6_INVALIDATE_CCU_DEPTH);
261 
262    /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
263    OUT_WFI5(ring);
264    fd6_emit_ccu_cntl(ring, screen, false);
265 }
266 
267 template <chip CHIP>
268 static void
emit_blit_setup(struct fd_ringbuffer * ring,enum pipe_format pfmt,bool scissor_enable,union pipe_color_union * color,uint32_t unknown_8c01,enum a6xx_rotation rotate)269 emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
270                 bool scissor_enable, union pipe_color_union *color,
271                 uint32_t unknown_8c01, enum a6xx_rotation rotate)
272 {
273    enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
274    bool is_srgb = util_format_is_srgb(pfmt);
275    enum a6xx_2d_ifmt ifmt = fd6_ifmt(fmt);
276 
277    if (is_srgb) {
278       assert(ifmt == R2D_UNORM8);
279       ifmt = R2D_UNORM8_SRGB;
280    }
281 
282    uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
283                         A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) |
284                         A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt) |
285                         A6XX_RB_2D_BLIT_CNTL_ROTATE(rotate) |
286                         COND(color, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
287                         COND(scissor_enable, A6XX_RB_2D_BLIT_CNTL_SCISSOR);
288 
289    OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
290    OUT_RING(ring, blit_cntl);
291 
292    OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
293    OUT_RING(ring, blit_cntl);
294 
295    if (fmt == FMT6_10_10_10_2_UNORM_DEST)
296       fmt = FMT6_16_16_16_16_FLOAT;
297 
298    /* This register is probably badly named... it seems that it's
299     * controlling the internal/accumulator format or something like
300     * that. It's certainly not tied to only the src format.
301     */
302    OUT_REG(ring, SP_2D_DST_FORMAT(
303          CHIP,
304          .sint = util_format_is_pure_sint(pfmt),
305          .uint = util_format_is_pure_uint(pfmt),
306          .color_format = fmt,
307          .srgb = is_srgb,
308          .mask = 0xf,
309    ));
310 
311    OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
312    OUT_RING(ring, unknown_8c01);
313 }
314 
315 static void
emit_blit_buffer_dst(struct fd_ringbuffer * ring,struct fd_resource * dst,unsigned off,unsigned size)316 emit_blit_buffer_dst(struct fd_ringbuffer *ring, struct fd_resource *dst,
317                      unsigned off, unsigned size)
318 {
319    OUT_REG(ring,
320            A6XX_RB_2D_DST_INFO(
321                  .color_format = FMT6_8_UNORM,
322                  .tile_mode = TILE6_LINEAR,
323                  .color_swap = WZYX,
324            ),
325            A6XX_RB_2D_DST(
326                  .bo = dst->bo,
327                  .bo_offset = off,
328            ),
329            A6XX_RB_2D_DST_PITCH(size),
330    );
331 }
332 
333 /* buffers need to be handled specially since x/width can exceed the bounds
334  * supported by hw.. if necessary decompose into (potentially) two 2D blits
335  */
336 template <chip CHIP>
337 static void
emit_blit_buffer(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)338 emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
339                  const struct pipe_blit_info *info)
340 {
341    const struct pipe_box *sbox = &info->src.box;
342    const struct pipe_box *dbox = &info->dst.box;
343    struct fd_resource *src, *dst;
344    unsigned sshift, dshift;
345 
346    if (DEBUG_BLIT) {
347       fprintf(stderr, "buffer blit: ");
348       dump_blit_info(info);
349    }
350 
351    src = fd_resource(info->src.resource);
352    dst = fd_resource(info->dst.resource);
353 
354    assert(src->layout.cpp == 1);
355    assert(dst->layout.cpp == 1);
356    assert(info->src.resource->format == info->dst.resource->format);
357    assert((sbox->y == 0) && (sbox->height == 1));
358    assert((dbox->y == 0) && (dbox->height == 1));
359    assert((sbox->z == 0) && (sbox->depth == 1));
360    assert((dbox->z == 0) && (dbox->depth == 1));
361    assert(sbox->width == dbox->width);
362    assert(info->src.level == 0);
363    assert(info->dst.level == 0);
364 
365    /*
366     * Buffers can have dimensions bigger than max width, remap into
367     * multiple 1d blits to fit within max dimension
368     *
369     * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
370     * seems to prevent overfetch related faults.  Not quite sure what
371     * the deal is there.
372     *
373     * Low 6 bits of SRC/DST addresses need to be zero (ie. address
374     * aligned to 64) so we need to shift src/dst x1/x2 to make up the
375     * difference.  On top of already splitting up the blit so width
376     * isn't > 16k.
377     *
378     * We perhaps could do a bit better, if src and dst are aligned but
379     * in the worst case this means we have to split the copy up into
380     * 16k (0x4000) minus 64 (0x40).
381     */
382 
383    sshift = sbox->x & 0x3f;
384    dshift = dbox->x & 0x3f;
385 
386    emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0);
387 
388    for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
389       unsigned soff, doff, w, p;
390 
391       soff = (sbox->x + off) & ~0x3f;
392       doff = (dbox->x + off) & ~0x3f;
393 
394       w = MIN2(sbox->width - off, (0x4000 - 0x40));
395       p = align(w, 64);
396 
397       assert((soff + w) <= fd_bo_size(src->bo));
398       assert((doff + w) <= fd_bo_size(dst->bo));
399 
400       /*
401        * Emit source:
402        */
403       OUT_REG(ring,
404               SP_PS_2D_SRC_INFO(
405                     CHIP,
406                     .color_format = FMT6_8_UNORM,
407                     .tile_mode = TILE6_LINEAR,
408                     .color_swap = WZYX,
409                     .unk20 = true,
410                     .unk22 = true,
411               ),
412               SP_PS_2D_SRC_SIZE(
413                     CHIP,
414                     .width = sshift + w,
415                     .height = 1,
416               ),
417               SP_PS_2D_SRC(
418                     CHIP,
419                     .bo = src->bo,
420                     .bo_offset = soff,
421               ),
422               SP_PS_2D_SRC_PITCH(
423                     CHIP,
424                     .pitch = p,
425               ),
426       );
427 
428       /*
429        * Emit destination:
430        */
431       emit_blit_buffer_dst(ring, dst, doff, p);
432 
433       /*
434        * Blit command:
435        */
436       OUT_REG(ring,
437               A6XX_GRAS_2D_SRC_TL_X(sshift),
438               A6XX_GRAS_2D_SRC_BR_X(sshift + w - 1),
439               A6XX_GRAS_2D_SRC_TL_Y(0),
440               A6XX_GRAS_2D_SRC_BR_Y(0),
441       );
442 
443       OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
444       OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
445       OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) |
446                         A6XX_GRAS_2D_DST_BR_Y(0));
447 
448       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
449       OUT_RING(ring, LABEL);
450       OUT_WFI5(ring);
451 
452       OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
453       OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
454 
455       OUT_PKT7(ring, CP_BLIT, 1);
456       OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
457 
458       OUT_WFI5(ring);
459 
460       OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
461       OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
462    }
463 }
464 
465 template <chip CHIP>
466 static void
fd6_clear_ubwc(struct fd_batch * batch,struct fd_resource * rsc)467 fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
468 {
469    struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
470    union pipe_color_union color = {};
471 
472    emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0);
473 
474    OUT_REG(ring,
475            SP_PS_2D_SRC_INFO(CHIP),
476            SP_PS_2D_SRC_SIZE(CHIP),
477            SP_PS_2D_SRC(CHIP),
478            SP_PS_2D_SRC_PITCH(CHIP),
479    );
480 
481    OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
482    OUT_RING(ring, 0x00000000);
483    OUT_RING(ring, 0x00000000);
484    OUT_RING(ring, 0x00000000);
485    OUT_RING(ring, 0x00000000);
486 
487    OUT_REG(ring,
488            A6XX_GRAS_2D_SRC_TL_X(0),
489            A6XX_GRAS_2D_SRC_BR_X(0),
490            A6XX_GRAS_2D_SRC_TL_Y(0),
491            A6XX_GRAS_2D_SRC_BR_Y(0),
492    );
493 
494    unsigned size = rsc->layout.slices[0].offset;
495    unsigned offset = 0;
496 
497    /* We could be more clever here and realize that we could use a
498     * larger width if the size is aligned to something more than a
499     * single page.. or even use a format larger than r8 in those
500     * cases. But for normal sized textures and even up to 16k x 16k
501     * at <= 4byte/pixel, we'll only go thru the loop once
502     */
503    const unsigned w = 0x1000;
504 
505    /* ubwc size should always be page aligned: */
506    assert((size % w) == 0);
507 
508    while (size > 0) {
509       const unsigned h = MIN2(0x4000, size / w);
510       /* width is already aligned to a suitable pitch: */
511       const unsigned p = w;
512 
513       /*
514        * Emit destination:
515        */
516       emit_blit_buffer_dst(ring, rsc, offset, p);
517 
518       /*
519        * Blit command:
520        */
521 
522       OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
523       OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
524       OUT_RING(ring,
525                A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1));
526 
527       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
528       OUT_RING(ring, LABEL);
529       OUT_WFI5(ring);
530 
531       OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
532       OUT_RING(ring, batch->ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
533 
534       OUT_PKT7(ring, CP_BLIT, 1);
535       OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
536 
537       OUT_WFI5(ring);
538 
539       OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
540       OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
541 
542       offset += w * h;
543       size -= w * h;
544    }
545 
546    fd6_emit_flushes(batch->ctx, ring,
547                     FD6_FLUSH_CCU_COLOR |
548                     FD6_FLUSH_CCU_DEPTH |
549                     FD6_FLUSH_CACHE |
550                     FD6_WAIT_FOR_IDLE);
551 }
552 
553 static void
emit_blit_dst(struct fd_ringbuffer * ring,struct pipe_resource * prsc,enum pipe_format pfmt,unsigned level,unsigned layer)554 emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
555               enum pipe_format pfmt, unsigned level, unsigned layer)
556 {
557    struct fd_resource *dst = fd_resource(prsc);
558    enum a6xx_format fmt =
559          fd6_color_format(pfmt, (enum a6xx_tile_mode)dst->layout.tile_mode);
560    enum a6xx_tile_mode tile =
561          (enum a6xx_tile_mode)fd_resource_tile_mode(prsc, level);
562    enum a3xx_color_swap swap =
563          fd6_color_swap(pfmt, (enum a6xx_tile_mode)dst->layout.tile_mode);
564    uint32_t pitch = fd_resource_pitch(dst, level);
565    bool ubwc_enabled = fd_resource_ubwc_enabled(dst, level);
566    unsigned off = fd_resource_offset(dst, level, layer);
567 
568    if (fmt == FMT6_Z24_UNORM_S8_UINT)
569       fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
570 
571    OUT_REG(ring,
572            A6XX_RB_2D_DST_INFO(
573                  .color_format = fmt,
574                  .tile_mode = tile,
575                  .color_swap = swap,
576                  .flags = ubwc_enabled,
577                  .srgb = util_format_is_srgb(pfmt),
578            ),
579            A6XX_RB_2D_DST(
580                  .bo = dst->bo,
581                  .bo_offset = off,
582            ),
583            A6XX_RB_2D_DST_PITCH(pitch),
584    );
585 
586    if (ubwc_enabled) {
587       OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS, 6);
588       fd6_emit_flag_reference(ring, dst, level, layer);
589       OUT_RING(ring, 0x00000000);
590       OUT_RING(ring, 0x00000000);
591       OUT_RING(ring, 0x00000000);
592    }
593 }
594 
595 template <chip CHIP>
596 static void
emit_blit_src(struct fd_ringbuffer * ring,const struct pipe_blit_info * info,unsigned layer,unsigned nr_samples)597 emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
598               unsigned layer, unsigned nr_samples)
599 {
600    struct fd_resource *src = fd_resource(info->src.resource);
601    enum a6xx_format sfmt =
602       fd6_texture_format(info->src.format, (enum a6xx_tile_mode)src->layout.tile_mode);
603    enum a6xx_tile_mode stile =
604       (enum a6xx_tile_mode)fd_resource_tile_mode(info->src.resource, info->src.level);
605    enum a3xx_color_swap sswap =
606       fd6_texture_swap(info->src.format, (enum a6xx_tile_mode)src->layout.tile_mode);
607    uint32_t pitch = fd_resource_pitch(src, info->src.level);
608    bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
609    unsigned soff = fd_resource_offset(src, info->src.level, layer);
610    uint32_t width = u_minify(src->b.b.width0, info->src.level) * nr_samples;
611    uint32_t height = u_minify(src->b.b.height0, info->src.level);
612    enum a3xx_msaa_samples samples = fd_msaa_samples(src->b.b.nr_samples);
613 
614    if (info->src.format == PIPE_FORMAT_A8_UNORM)
615       sfmt = FMT6_A8_UNORM;
616 
617    OUT_REG(ring,
618            SP_PS_2D_SRC_INFO(
619                  CHIP,
620                  .color_format = sfmt,
621                  .tile_mode = stile,
622                  .color_swap = sswap,
623                  .flags = subwc_enabled,
624                  .srgb  = util_format_is_srgb(info->src.format),
625                  .samples = samples,
626                  .filter = (info->filter == PIPE_TEX_FILTER_LINEAR),
627                  .samples_average = (samples > MSAA_ONE) && !info->sample0_only,
628                  .unk20 = true,
629                  .unk22 = true,
630            ),
631            SP_PS_2D_SRC_SIZE(
632                  CHIP,
633                  .width = width,
634                  .height = height,
635            ),
636            SP_PS_2D_SRC(
637                  CHIP,
638                  .bo = src->bo,
639                  .bo_offset = soff,
640            ),
641            SP_PS_2D_SRC_PITCH(
642                  CHIP,
643                  .pitch = pitch,
644            ),
645    );
646 
647    if (subwc_enabled && fd_resource_ubwc_enabled(src, info->src.level)) {
648       OUT_REG(ring,
649               SP_PS_2D_SRC_FLAGS(
650                     CHIP,
651                     .bo = src->bo,
652                     .bo_offset = fd_resource_ubwc_offset(src, info->src.level, layer),
653               ),
654               SP_PS_2D_SRC_FLAGS_PITCH(
655                     CHIP, fdl_ubwc_pitch(&src->layout, info->src.level)),
656       );
657    }
658 }
659 
660 template <chip CHIP>
661 static void
emit_blit_texture(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)662 emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
663                   const struct pipe_blit_info *info)
664 {
665    const struct pipe_box *sbox = &info->src.box;
666    const struct pipe_box *dbox = &info->dst.box;
667    struct fd_resource *dst;
668    int sx1, sy1, sx2, sy2;
669    int dx1, dy1, dx2, dy2;
670 
671    if (DEBUG_BLIT) {
672       fprintf(stderr, "texture blit: ");
673       dump_blit_info(info);
674    }
675 
676    dst = fd_resource(info->dst.resource);
677 
678    uint32_t nr_samples = fd_resource_nr_samples(&dst->b.b);
679 
680    sx1 = sbox->x * nr_samples;
681    sy1 = sbox->y;
682    sx2 = (sbox->x + sbox->width) * nr_samples;
683    sy2 = sbox->y + sbox->height;
684 
685    dx1 = dbox->x * nr_samples;
686    dy1 = dbox->y;
687    dx2 = (dbox->x + dbox->width) * nr_samples;
688    dy2 = dbox->y + dbox->height;
689 
690    static const enum a6xx_rotation rotates[2][2] = {
691       {ROTATE_0, ROTATE_HFLIP},
692       {ROTATE_VFLIP, ROTATE_180},
693    };
694    bool mirror_x = (sx2 < sx1) != (dx2 < dx1);
695    bool mirror_y = (sy2 < sy1) != (dy2 < dy1);
696 
697    enum a6xx_rotation rotate = rotates[mirror_y][mirror_x];
698 
699    OUT_REG(ring,
700            A6XX_GRAS_2D_SRC_TL_X(MIN2(sx1, sx2)),
701            A6XX_GRAS_2D_SRC_BR_X(MAX2(sx1, sx2) - 1),
702            A6XX_GRAS_2D_SRC_TL_Y(MIN2(sy1, sy2)),
703            A6XX_GRAS_2D_SRC_BR_Y(MAX2(sy1, sy2) - 1),
704    );
705 
706    OUT_REG(ring,
707            A6XX_GRAS_2D_DST_TL(.x = MIN2(dx1, dx2),
708                                .y = MIN2(dy1, dy2)),
709            A6XX_GRAS_2D_DST_BR(.x = MAX2(dx1, dx2) - 1,
710                                .y = MAX2(dy1, dy2) - 1),
711    );
712 
713    if (info->scissor_enable) {
714       OUT_PKT4(ring, REG_A6XX_GRAS_2D_RESOLVE_CNTL_1, 2);
715       OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.minx) |
716                         A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.miny));
717       OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
718                         A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
719    }
720 
721    emit_blit_setup<CHIP>(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate);
722 
723    for (unsigned i = 0; i < info->dst.box.depth; i++) {
724 
725       emit_blit_src<CHIP>(ring, info, sbox->z + i, nr_samples);
726       emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level,
727                     dbox->z + i);
728 
729       /*
730        * Blit command:
731        */
732       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
733       OUT_RING(ring, LABEL);
734       OUT_WFI5(ring);
735 
736       OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
737       OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
738 
739       OUT_PKT7(ring, CP_BLIT, 1);
740       OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
741 
742       OUT_WFI5(ring);
743 
744       OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
745       OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
746    }
747 }
748 
749 static void
emit_clear_color(struct fd_ringbuffer * ring,enum pipe_format pfmt,union pipe_color_union * color)750 emit_clear_color(struct fd_ringbuffer *ring, enum pipe_format pfmt,
751                  union pipe_color_union *color)
752 {
753    switch (pfmt) {
754    case PIPE_FORMAT_Z24X8_UNORM:
755    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
756    case PIPE_FORMAT_X24S8_UINT: {
757       uint32_t depth_unorm24 = color->f[0] * ((1u << 24) - 1);
758       uint8_t stencil = color->ui[1];
759       color->ui[0] = depth_unorm24 & 0xff;
760       color->ui[1] = (depth_unorm24 >> 8) & 0xff;
761       color->ui[2] = (depth_unorm24 >> 16) & 0xff;
762       color->ui[3] = stencil;
763       break;
764    }
765    default:
766       break;
767    }
768 
769    OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
770    switch (fd6_ifmt(fd6_color_format(pfmt, TILE6_LINEAR))) {
771    case R2D_UNORM8:
772    case R2D_UNORM8_SRGB:
773       /* The r2d ifmt is badly named, it also covers the signed case: */
774       if (util_format_is_snorm(pfmt)) {
775          OUT_RING(ring, float_to_byte_tex(color->f[0]));
776          OUT_RING(ring, float_to_byte_tex(color->f[1]));
777          OUT_RING(ring, float_to_byte_tex(color->f[2]));
778          OUT_RING(ring, float_to_byte_tex(color->f[3]));
779       } else {
780          OUT_RING(ring, float_to_ubyte(color->f[0]));
781          OUT_RING(ring, float_to_ubyte(color->f[1]));
782          OUT_RING(ring, float_to_ubyte(color->f[2]));
783          OUT_RING(ring, float_to_ubyte(color->f[3]));
784       }
785       break;
786    case R2D_FLOAT16:
787       OUT_RING(ring, _mesa_float_to_half(color->f[0]));
788       OUT_RING(ring, _mesa_float_to_half(color->f[1]));
789       OUT_RING(ring, _mesa_float_to_half(color->f[2]));
790       OUT_RING(ring, _mesa_float_to_half(color->f[3]));
791       break;
792    case R2D_FLOAT32:
793    case R2D_INT32:
794    case R2D_INT16:
795    case R2D_INT8:
796    default:
797       OUT_RING(ring, color->ui[0]);
798       OUT_RING(ring, color->ui[1]);
799       OUT_RING(ring, color->ui[2]);
800       OUT_RING(ring, color->ui[3]);
801       break;
802    }
803 }
804 
805 template <chip CHIP>
806 void
fd6_clear_lrz(struct fd_batch * batch,struct fd_resource * zsbuf,struct fd_bo * lrz,double depth)807 fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
808               struct fd_bo *lrz, double depth)
809 {
810    struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
811 
812    if (DEBUG_BLIT) {
813       fprintf(stderr, "lrz clear:\ndst resource: ");
814       util_dump_resource(stderr, &zsbuf->b.b);
815       fprintf(stderr, "\n");
816    }
817 
818    OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
819    OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
820    OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) |
821                      A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1));
822 
823    union pipe_color_union clear_color = { .f = {depth} };
824 
825    emit_clear_color(ring, PIPE_FORMAT_Z16_UNORM, &clear_color);
826    emit_blit_setup<CHIP>(ring, PIPE_FORMAT_Z16_UNORM, false, &clear_color, 0, ROTATE_0);
827 
828    OUT_REG(ring,
829            A6XX_RB_2D_DST_INFO(
830                  .color_format = FMT6_16_UNORM,
831                  .tile_mode = TILE6_LINEAR,
832                  .color_swap = WZYX,
833            ),
834            A6XX_RB_2D_DST(
835                  .bo = lrz,
836            ),
837            A6XX_RB_2D_DST_PITCH(zsbuf->lrz_pitch * 2),
838    );
839 
840    /*
841     * Blit command:
842     */
843 
844    OUT_PKT7(ring, CP_BLIT, 1);
845    OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
846 }
847 
848 template void fd6_clear_lrz<A6XX>(struct fd_batch *batch, struct fd_resource *zsbuf, struct fd_bo *lrz, double depth);
849 template void fd6_clear_lrz<A7XX>(struct fd_batch *batch, struct fd_resource *zsbuf, struct fd_bo *lrz, double depth);
850 
851 /**
852  * Handle conversion of clear color
853  */
854 static union pipe_color_union
convert_color(enum pipe_format format,union pipe_color_union * pcolor)855 convert_color(enum pipe_format format, union pipe_color_union *pcolor)
856 {
857    union pipe_color_union color = *pcolor;
858 
859    /* For solid-fill blits, the hw isn't going to convert from
860     * linear to srgb for us:
861     */
862    if (util_format_is_srgb(format)) {
863       for (int i = 0; i < 3; i++)
864          color.f[i] = util_format_linear_to_srgb_float(color.f[i]);
865    }
866 
867    if (util_format_is_snorm(format)) {
868       for (int i = 0; i < 3; i++)
869          color.f[i] = CLAMP(color.f[i], -1.0f, 1.0f);
870    }
871 
872    /* Note that float_to_ubyte() already clamps, for the unorm case */
873 
874    return color;
875 }
876 
877 template <chip CHIP>
878 void
fd6_clear_surface(struct fd_context * ctx,struct fd_ringbuffer * ring,struct pipe_surface * psurf,const struct pipe_box * box2d,union pipe_color_union * color,uint32_t unknown_8c01)879 fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
880                   struct pipe_surface *psurf, const struct pipe_box *box2d,
881                   union pipe_color_union *color, uint32_t unknown_8c01)
882 {
883    if (DEBUG_BLIT) {
884       fprintf(stderr, "surface clear:\ndst resource: ");
885       util_dump_resource(stderr, psurf->texture);
886       fprintf(stderr, "\n");
887    }
888 
889    uint32_t nr_samples = fd_resource_nr_samples(psurf->texture);
890    OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
891    OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(box2d->x * nr_samples) |
892                      A6XX_GRAS_2D_DST_TL_Y(box2d->y));
893    OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X((box2d->x + box2d->width) * nr_samples - 1) |
894                      A6XX_GRAS_2D_DST_BR_Y(box2d->y + box2d->height - 1));
895 
896    union pipe_color_union clear_color = convert_color(psurf->format, color);
897 
898    emit_clear_color(ring, psurf->format, &clear_color);
899    emit_blit_setup<CHIP>(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0);
900 
901    for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer;
902         i++) {
903       emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level, i);
904 
905       /*
906        * Blit command:
907        */
908       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
909       OUT_RING(ring, LABEL);
910       OUT_WFI5(ring);
911 
912       OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
913       OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
914 
915       OUT_PKT7(ring, CP_BLIT, 1);
916       OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
917 
918       OUT_WFI5(ring);
919 
920       OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
921       OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
922    }
923 }
924 
925 template void fd6_clear_surface<A6XX>(struct fd_context *ctx, struct fd_ringbuffer *ring,
926                                       struct pipe_surface *psurf, const struct pipe_box *box2d,
927                                       union pipe_color_union *color, uint32_t unknown_8c01);
928 template void fd6_clear_surface<A7XX>(struct fd_context *ctx, struct fd_ringbuffer *ring,
929                                       struct pipe_surface *psurf, const struct pipe_box *box2d,
930                                       union pipe_color_union *color, uint32_t unknown_8c01);
931 
932 template <chip CHIP>
933 static void
fd6_clear_texture(struct pipe_context * pctx,struct pipe_resource * prsc,unsigned level,const struct pipe_box * box,const void * data)934 fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
935                   unsigned level, const struct pipe_box *box, const void *data)
936    assert_dt
937 {
938    struct fd_context *ctx = fd_context(pctx);
939    struct fd_resource *rsc = fd_resource(prsc);
940 
941    if (DEBUG_BLIT) {
942       fprintf(stderr, "surface texture:\ndst resource: ");
943       util_dump_resource(stderr, prsc);
944       fprintf(stderr, "\n");
945    }
946 
947    if (!can_do_clear(prsc, level, box)) {
948       u_default_clear_texture(pctx, prsc, level, box, data);
949       return;
950    }
951 
952    union pipe_color_union color;
953 
954    if (util_format_is_depth_or_stencil(prsc->format)) {
955       const struct util_format_description *desc =
956              util_format_description(prsc->format);
957       float depth = 0.0f;
958       uint8_t stencil = 0;
959 
960       if (util_format_has_depth(desc))
961          util_format_unpack_z_float(prsc->format, &depth, data, 1);
962 
963       if (util_format_has_stencil(desc))
964          util_format_unpack_s_8uint(prsc->format, &stencil, data, 1);
965 
966       if (rsc->stencil)
967          fd6_clear_texture<CHIP>(pctx, &rsc->stencil->b.b, level, box, &stencil);
968 
969       color.f[0] = depth;
970       color.ui[1] = stencil;
971    } else {
972       util_format_unpack_rgba(prsc->format, color.ui, data, 1);
973    }
974 
975    struct fd_batch *batch = fd_bc_alloc_batch(ctx, true);
976 
977    fd_screen_lock(ctx->screen);
978    fd_batch_resource_write(batch, rsc);
979    fd_screen_unlock(ctx->screen);
980 
981    assert(!batch->flushed);
982 
983    /* Marking the batch as needing flush must come after the batch
984     * dependency tracking (resource_read()/resource_write()), as that
985     * can trigger a flush
986     */
987    fd_batch_needs_flush(batch);
988 
989    fd_batch_update_queries(batch);
990 
991    emit_setup(batch);
992 
993    struct pipe_surface surf = {
994          .format = prsc->format,
995          .texture = prsc,
996          .u = {
997                .tex = {
998                      .level = level,
999                      .first_layer = box->z,
1000                      .last_layer = box->depth + box->z - 1,
1001                },
1002          },
1003    };
1004 
1005    fd6_clear_surface<CHIP>(ctx, batch->draw, &surf, box, &color, 0);
1006 
1007    fd6_emit_flushes(batch->ctx, batch->draw,
1008                     FD6_FLUSH_CCU_COLOR |
1009                     FD6_FLUSH_CCU_DEPTH |
1010                     FD6_FLUSH_CACHE |
1011                     FD6_WAIT_FOR_IDLE);
1012 
1013    fd_batch_flush(batch);
1014    fd_batch_reference(&batch, NULL);
1015 
1016    /* Acc query state will have been dirtied by our fd_batch_update_queries, so
1017     * the ctx->batch may need to turn its queries back on.
1018     */
1019    fd_context_dirty(ctx, FD_DIRTY_QUERY);
1020 }
1021 
1022 template <chip CHIP>
1023 void
fd6_resolve_tile(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,uint32_t unknown_8c01)1024 fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
1025                  uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01)
1026 {
1027    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1028    uint64_t gmem_base = batch->ctx->screen->gmem_base + base;
1029    uint32_t gmem_pitch = gmem->bin_w * batch->framebuffer.samples *
1030                          util_format_get_blocksize(psurf->format);
1031 
1032    OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
1033    OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
1034    OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(psurf->width - 1) |
1035                      A6XX_GRAS_2D_DST_BR_Y(psurf->height - 1));
1036 
1037    OUT_REG(ring,
1038            A6XX_GRAS_2D_SRC_TL_X(0),
1039            A6XX_GRAS_2D_SRC_BR_X(psurf->width - 1),
1040            A6XX_GRAS_2D_SRC_TL_Y(0),
1041            A6XX_GRAS_2D_SRC_BR_Y(psurf->height - 1),
1042    );
1043 
1044    /* Enable scissor bit, which will take into account the window scissor
1045     * which is set per-tile
1046     */
1047    emit_blit_setup<CHIP>(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0);
1048 
1049    /* We shouldn't be using GMEM in the layered rendering case: */
1050    assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
1051 
1052    emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level,
1053                  psurf->u.tex.first_layer);
1054 
1055    enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR);
1056    enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
1057 
1058    OUT_REG(ring,
1059            SP_PS_2D_SRC_INFO(
1060                  CHIP,
1061                  .color_format = sfmt,
1062                  .tile_mode = TILE6_2,
1063                  .color_swap = WZYX,
1064                  .srgb = util_format_is_srgb(psurf->format),
1065                  .samples = samples,
1066                  .samples_average = samples > MSAA_ONE,
1067                  .unk20 = true,
1068                  .unk22 = true,
1069            ),
1070            SP_PS_2D_SRC_SIZE(
1071                  CHIP,
1072                  .width = psurf->width,
1073                  .height = psurf->height,
1074            ),
1075            SP_PS_2D_SRC(
1076                  CHIP,
1077                  .qword = gmem_base,
1078            ),
1079            SP_PS_2D_SRC_PITCH(
1080                  CHIP,
1081                  .pitch = gmem_pitch,
1082            ),
1083    );
1084 
1085    /* sync GMEM writes with CACHE. */
1086    fd6_cache_inv(batch, ring);
1087 
1088    /* Wait for CACHE_INVALIDATE to land */
1089    OUT_WFI5(ring);
1090 
1091    OUT_PKT7(ring, CP_BLIT, 1);
1092    OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
1093 
1094    OUT_WFI5(ring);
1095 
1096    /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
1097     * sysmem, and we generally assume that GMEM renderpasses leave their
1098     * results in sysmem, so we need to flush manually here.
1099     */
1100    fd6_emit_flushes(batch->ctx, ring,
1101                     FD6_FLUSH_CCU_COLOR | FD6_WAIT_FOR_IDLE);
1102 }
1103 
1104 template void fd6_resolve_tile<A6XX>(struct fd_batch *batch, struct fd_ringbuffer *ring,
1105                                      uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01);
1106 template void fd6_resolve_tile<A7XX>(struct fd_batch *batch, struct fd_ringbuffer *ring,
1107                                      uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01);
1108 
1109 template <chip CHIP>
1110 static bool
handle_rgba_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1111 handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
1112    assert_dt
1113 {
1114    struct fd_batch *batch;
1115 
1116    assert(!(info->mask & PIPE_MASK_ZS));
1117 
1118    if (!can_do_blit(info))
1119       return false;
1120 
1121    struct fd_resource *src = fd_resource(info->src.resource);
1122    struct fd_resource *dst = fd_resource(info->dst.resource);
1123 
1124    fd6_validate_format(ctx, src, info->src.format);
1125    fd6_validate_format(ctx, dst, info->dst.format);
1126 
1127    batch = fd_bc_alloc_batch(ctx, true);
1128 
1129    fd_screen_lock(ctx->screen);
1130 
1131    fd_batch_resource_read(batch, src);
1132    fd_batch_resource_write(batch, dst);
1133 
1134    fd_screen_unlock(ctx->screen);
1135 
1136    assert(!batch->flushed);
1137 
1138    /* Marking the batch as needing flush must come after the batch
1139     * dependency tracking (resource_read()/resource_write()), as that
1140     * can trigger a flush
1141     */
1142    fd_batch_needs_flush(batch);
1143 
1144    fd_batch_update_queries(batch);
1145 
1146    emit_setup(batch);
1147 
1148    DBG_BLIT(info, batch);
1149 
1150    trace_start_blit(&batch->trace, batch->draw, info->src.resource->target,
1151                     info->dst.resource->target);
1152 
1153    if ((info->src.resource->target == PIPE_BUFFER) &&
1154        (info->dst.resource->target == PIPE_BUFFER)) {
1155       assert(src->layout.tile_mode == TILE6_LINEAR);
1156       assert(dst->layout.tile_mode == TILE6_LINEAR);
1157       emit_blit_buffer<CHIP>(ctx, batch->draw, info);
1158    } else {
1159       /* I don't *think* we need to handle blits between buffer <-> !buffer */
1160       assert(info->src.resource->target != PIPE_BUFFER);
1161       assert(info->dst.resource->target != PIPE_BUFFER);
1162       emit_blit_texture<CHIP>(ctx, batch->draw, info);
1163    }
1164 
1165    trace_end_blit(&batch->trace, batch->draw);
1166 
1167    fd6_emit_flushes(batch->ctx, batch->draw,
1168                     FD6_FLUSH_CCU_COLOR |
1169                     FD6_FLUSH_CCU_DEPTH |
1170                     FD6_FLUSH_CACHE |
1171                     FD6_WAIT_FOR_IDLE);
1172 
1173    fd_batch_flush(batch);
1174    fd_batch_reference(&batch, NULL);
1175 
1176    /* Acc query state will have been dirtied by our fd_batch_update_queries, so
1177     * the ctx->batch may need to turn its queries back on.
1178     */
1179    fd_context_dirty(ctx, FD_DIRTY_QUERY);
1180 
1181    return true;
1182 }
1183 
1184 /**
1185  * Re-written z/s blits can still fail for various reasons (for example MSAA).
1186  * But we want to do the fallback blit with the re-written pipe_blit_info,
1187  * in particular as u_blitter cannot blit stencil.  So handle the fallback
1188  * ourself and never "fail".
1189  */
1190 template <chip CHIP>
1191 static bool
do_rewritten_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1192 do_rewritten_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
1193    assert_dt
1194 {
1195    bool success = handle_rgba_blit<CHIP>(ctx, info);
1196    if (!success) {
1197       success = fd_blitter_blit(ctx, info);
1198    }
1199    assert(success); /* fallback should never fail! */
1200    return success;
1201 }
1202 
1203 /**
1204  * Handle depth/stencil blits either via u_blitter and/or re-writing the
1205  * blit into an equivilant format that we can handle
1206  */
1207 template <chip CHIP>
1208 static bool
handle_zs_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1209 handle_zs_blit(struct fd_context *ctx,
1210                const struct pipe_blit_info *info) assert_dt
1211 {
1212    struct pipe_blit_info blit = *info;
1213 
1214    if (DEBUG_BLIT) {
1215       fprintf(stderr, "---- handle_zs_blit: ");
1216       dump_blit_info(info);
1217    }
1218 
1219    if (info->src.format != info->dst.format)
1220       return false;
1221 
1222    struct fd_resource *src = fd_resource(info->src.resource);
1223    struct fd_resource *dst = fd_resource(info->dst.resource);
1224 
1225    switch (info->dst.format) {
1226    case PIPE_FORMAT_S8_UINT:
1227       assert(info->mask == PIPE_MASK_S);
1228       blit.mask = PIPE_MASK_R;
1229       blit.src.format = PIPE_FORMAT_R8_UINT;
1230       blit.dst.format = PIPE_FORMAT_R8_UINT;
1231       blit.sample0_only = true;
1232       return do_rewritten_blit<CHIP>(ctx, &blit);
1233 
1234    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1235       if (info->mask & PIPE_MASK_Z) {
1236          blit.mask = PIPE_MASK_R;
1237          blit.src.format = PIPE_FORMAT_R32_FLOAT;
1238          blit.dst.format = PIPE_FORMAT_R32_FLOAT;
1239          blit.sample0_only = true;
1240          do_rewritten_blit<CHIP>(ctx, &blit);
1241       }
1242 
1243       if (info->mask & PIPE_MASK_S) {
1244          blit.mask = PIPE_MASK_R;
1245          blit.src.format = PIPE_FORMAT_R8_UINT;
1246          blit.dst.format = PIPE_FORMAT_R8_UINT;
1247          blit.src.resource = &src->stencil->b.b;
1248          blit.dst.resource = &dst->stencil->b.b;
1249          blit.sample0_only = true;
1250          do_rewritten_blit<CHIP>(ctx, &blit);
1251       }
1252 
1253       return true;
1254 
1255    case PIPE_FORMAT_Z16_UNORM:
1256       blit.mask = PIPE_MASK_R;
1257       blit.src.format = PIPE_FORMAT_R16_UNORM;
1258       blit.dst.format = PIPE_FORMAT_R16_UNORM;
1259       blit.sample0_only = true;
1260       return do_rewritten_blit<CHIP>(ctx, &blit);
1261 
1262    case PIPE_FORMAT_Z32_UNORM:
1263    case PIPE_FORMAT_Z32_FLOAT:
1264       assert(info->mask == PIPE_MASK_Z);
1265       blit.mask = PIPE_MASK_R;
1266       blit.src.format = PIPE_FORMAT_R32_UINT;
1267       blit.dst.format = PIPE_FORMAT_R32_UINT;
1268       blit.sample0_only = true;
1269       return do_rewritten_blit<CHIP>(ctx, &blit);
1270 
1271    case PIPE_FORMAT_Z24X8_UNORM:
1272    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1273       blit.mask = 0;
1274       if (info->mask & PIPE_MASK_Z)
1275          blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
1276       if (info->mask & PIPE_MASK_S)
1277          blit.mask |= PIPE_MASK_A;
1278       blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1279       blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1280       /* non-UBWC Z24_UNORM_S8_UINT_AS_R8G8B8A8 is broken on a630, fall back to
1281        * 8888_unorm.
1282        */
1283       if (!ctx->screen->info->a6xx.has_z24uint_s8uint) {
1284          if (!src->layout.ubwc && !dst->layout.ubwc) {
1285             blit.src.format = PIPE_FORMAT_RGBA8888_UINT;
1286             blit.dst.format = PIPE_FORMAT_RGBA8888_UINT;
1287          } else {
1288             if (!src->layout.ubwc)
1289                blit.src.format = PIPE_FORMAT_RGBA8888_UNORM;
1290             if (!dst->layout.ubwc)
1291                blit.dst.format = PIPE_FORMAT_RGBA8888_UNORM;
1292          }
1293       }
1294       if (info->src.resource->nr_samples > 1 && blit.src.format != PIPE_FORMAT_RGBA8888_UINT)
1295          blit.sample0_only = true;
1296       return fd_blitter_blit(ctx, &blit);
1297 
1298    default:
1299       return false;
1300    }
1301 }
1302 
1303 template <chip CHIP>
1304 static bool
handle_compressed_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1305 handle_compressed_blit(struct fd_context *ctx,
1306                        const struct pipe_blit_info *info) assert_dt
1307 {
1308    struct pipe_blit_info blit = *info;
1309 
1310    if (DEBUG_BLIT) {
1311       fprintf(stderr, "---- handle_compressed_blit: ");
1312       dump_blit_info(info);
1313    }
1314 
1315    if (info->src.format != info->dst.format)
1316       return fd_blitter_blit(ctx, info);
1317 
1318    if (util_format_get_blocksize(info->src.format) == 8) {
1319       blit.src.format = blit.dst.format = PIPE_FORMAT_R16G16B16A16_UINT;
1320    } else {
1321       assert(util_format_get_blocksize(info->src.format) == 16);
1322       blit.src.format = blit.dst.format = PIPE_FORMAT_R32G32B32A32_UINT;
1323    }
1324 
1325    int bw = util_format_get_blockwidth(info->src.format);
1326    int bh = util_format_get_blockheight(info->src.format);
1327 
1328    /* NOTE: x/y *must* be aligned to block boundary (ie. in
1329     * glCompressedTexSubImage2D()) but width/height may not
1330     * be:
1331     */
1332 
1333    assert((blit.src.box.x % bw) == 0);
1334    assert((blit.src.box.y % bh) == 0);
1335 
1336    blit.src.box.x /= bw;
1337    blit.src.box.y /= bh;
1338    blit.src.box.width = DIV_ROUND_UP(blit.src.box.width, bw);
1339    blit.src.box.height = DIV_ROUND_UP(blit.src.box.height, bh);
1340 
1341    assert((blit.dst.box.x % bw) == 0);
1342    assert((blit.dst.box.y % bh) == 0);
1343 
1344    blit.dst.box.x /= bw;
1345    blit.dst.box.y /= bh;
1346    blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw);
1347    blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
1348 
1349    return do_rewritten_blit<CHIP>(ctx, &blit);
1350 }
1351 
1352 /**
1353  * For SNORM formats, copy them as the equivalent UNORM format.  If we treat
1354  * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81
1355  * (also -1.0), when we're supposed to be memcpying the bits. See
1356  * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
1357  */
1358 template <chip CHIP>
1359 static bool
handle_snorm_copy_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1360 handle_snorm_copy_blit(struct fd_context *ctx,
1361                        const struct pipe_blit_info *info)
1362    assert_dt
1363 {
1364    /* If we're interpolating the pixels, we can't just treat the values as unorm. */
1365    if (info->filter == PIPE_TEX_FILTER_LINEAR)
1366       return false;
1367 
1368    struct pipe_blit_info blit = *info;
1369 
1370    blit.src.format = blit.dst.format = util_format_snorm_to_unorm(info->src.format);
1371 
1372    return do_rewritten_blit<CHIP>(ctx, &blit);
1373 }
1374 
1375 template <chip CHIP>
1376 static bool
fd6_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1377 fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt
1378 {
1379    if (info->mask & PIPE_MASK_ZS)
1380       return handle_zs_blit<CHIP>(ctx, info);
1381 
1382    if (util_format_is_compressed(info->src.format) ||
1383        util_format_is_compressed(info->dst.format))
1384       return handle_compressed_blit<CHIP>(ctx, info);
1385 
1386    if ((info->src.format == info->dst.format) &&
1387        util_format_is_snorm(info->src.format))
1388       return handle_snorm_copy_blit<CHIP>(ctx, info);
1389 
1390    return handle_rgba_blit<CHIP>(ctx, info);
1391 }
1392 
1393 template <chip CHIP>
1394 void
fd6_blitter_init(struct pipe_context * pctx)1395 fd6_blitter_init(struct pipe_context *pctx)
1396    disable_thread_safety_analysis
1397 {
1398    struct fd_context *ctx = fd_context(pctx);
1399 
1400    ctx->clear_ubwc = fd6_clear_ubwc<CHIP>;
1401    ctx->validate_format = fd6_validate_format;
1402 
1403    if (FD_DBG(NOBLIT))
1404       return;
1405 
1406    pctx->clear_texture = fd6_clear_texture<CHIP>;
1407    ctx->blit = fd6_blit<CHIP>;
1408 }
1409 
1410 /* Teach the compiler about needed variants: */
1411 template void fd6_blitter_init<A6XX>(struct pipe_context *pctx);
1412 template void fd6_blitter_init<A7XX>(struct pipe_context *pctx);
1413 
1414 unsigned
fd6_tile_mode_for_format(enum pipe_format pfmt)1415 fd6_tile_mode_for_format(enum pipe_format pfmt)
1416 {
1417    /* basically just has to be a format we can blit, so uploads/downloads
1418     * via linear staging buffer works:
1419     */
1420    if (ok_format(pfmt))
1421       return TILE6_3;
1422 
1423    return TILE6_LINEAR;
1424 }
1425 unsigned
fd6_tile_mode(const struct pipe_resource * tmpl)1426 fd6_tile_mode(const struct pipe_resource *tmpl)
1427 {
1428    /* if the mipmap level 0 is still too small to be tiled, then don't
1429     * bother pretending:
1430     */
1431    if ((tmpl->width0 < FDL_MIN_UBWC_WIDTH) &&
1432          !util_format_is_depth_or_stencil(tmpl->format))
1433       return TILE6_LINEAR;
1434 
1435    return fd6_tile_mode_for_format(tmpl->format);
1436 }
1437