1 /*
2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #define FD_BO_NO_HARDPIN 1
29
30 #include "util/format_srgb.h"
31 #include "util/half_float.h"
32 #include "util/u_dump.h"
33 #include "util/u_log.h"
34 #include "util/u_surface.h"
35
36 #include "freedreno_blitter.h"
37 #include "freedreno_fence.h"
38 #include "freedreno_resource.h"
39 #include "freedreno_tracepoints.h"
40
41 #include "fd6_barrier.h"
42 #include "fd6_blitter.h"
43 #include "fd6_emit.h"
44 #include "fd6_pack.h"
45 #include "fd6_resource.h"
46
47 static inline enum a6xx_2d_ifmt
fd6_ifmt(enum a6xx_format fmt)48 fd6_ifmt(enum a6xx_format fmt)
49 {
50 switch (fmt) {
51 case FMT6_A8_UNORM:
52 case FMT6_8_UNORM:
53 case FMT6_8_SNORM:
54 case FMT6_8_8_UNORM:
55 case FMT6_8_8_SNORM:
56 case FMT6_8_8_8_8_UNORM:
57 case FMT6_8_8_8_X8_UNORM:
58 case FMT6_8_8_8_8_SNORM:
59 case FMT6_4_4_4_4_UNORM:
60 case FMT6_5_5_5_1_UNORM:
61 case FMT6_5_6_5_UNORM:
62 return R2D_UNORM8;
63
64 case FMT6_32_UINT:
65 case FMT6_32_SINT:
66 case FMT6_32_32_UINT:
67 case FMT6_32_32_SINT:
68 case FMT6_32_32_32_32_UINT:
69 case FMT6_32_32_32_32_SINT:
70 return R2D_INT32;
71
72 case FMT6_16_UINT:
73 case FMT6_16_SINT:
74 case FMT6_16_16_UINT:
75 case FMT6_16_16_SINT:
76 case FMT6_16_16_16_16_UINT:
77 case FMT6_16_16_16_16_SINT:
78 case FMT6_10_10_10_2_UINT:
79 return R2D_INT16;
80
81 case FMT6_8_UINT:
82 case FMT6_8_SINT:
83 case FMT6_8_8_UINT:
84 case FMT6_8_8_SINT:
85 case FMT6_8_8_8_8_UINT:
86 case FMT6_8_8_8_8_SINT:
87 case FMT6_Z24_UNORM_S8_UINT:
88 case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
89 return R2D_INT8;
90
91 case FMT6_16_UNORM:
92 case FMT6_16_SNORM:
93 case FMT6_16_16_UNORM:
94 case FMT6_16_16_SNORM:
95 case FMT6_16_16_16_16_UNORM:
96 case FMT6_16_16_16_16_SNORM:
97 case FMT6_32_FLOAT:
98 case FMT6_32_32_FLOAT:
99 case FMT6_32_32_32_32_FLOAT:
100 return R2D_FLOAT32;
101
102 case FMT6_16_FLOAT:
103 case FMT6_16_16_FLOAT:
104 case FMT6_16_16_16_16_FLOAT:
105 case FMT6_11_11_10_FLOAT:
106 case FMT6_10_10_10_2_UNORM_DEST:
107 return R2D_FLOAT16;
108
109 default:
110 unreachable("bad format");
111 return (enum a6xx_2d_ifmt)0;
112 }
113 }
114
115 /* Make sure none of the requested dimensions extend beyond the size of the
116 * resource. Not entirely sure why this happens, but sometimes it does, and
117 * w/ 2d blt doesn't have wrap modes like a sampler, so force those cases
118 * back to u_blitter
119 */
120 static bool
ok_dims(const struct pipe_resource * r,const struct pipe_box * b,int lvl)121 ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
122 {
123 int last_layer =
124 r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl) : r->array_size;
125
126 return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
127 (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
128 (b->z >= 0) && (b->z + b->depth <= last_layer);
129 }
130
131 static bool
ok_format(enum pipe_format pfmt)132 ok_format(enum pipe_format pfmt)
133 {
134 enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
135
136 if (util_format_is_compressed(pfmt))
137 return true;
138
139 switch (pfmt) {
140 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
141 case PIPE_FORMAT_Z24X8_UNORM:
142 case PIPE_FORMAT_Z16_UNORM:
143 case PIPE_FORMAT_Z32_UNORM:
144 case PIPE_FORMAT_Z32_FLOAT:
145 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
146 case PIPE_FORMAT_S8_UINT:
147 return true;
148 default:
149 break;
150 }
151
152 if (fmt == FMT6_NONE)
153 return false;
154
155 return true;
156 }
157
158 #define DEBUG_BLIT 0
159 #define DEBUG_BLIT_FALLBACK 0
160
161 #define fail_if(cond) \
162 do { \
163 if (cond) { \
164 if (DEBUG_BLIT_FALLBACK) { \
165 fprintf(stderr, "falling back: %s for blit:\n", #cond); \
166 dump_blit_info(info); \
167 } \
168 return false; \
169 } \
170 } while (0)
171
172 static bool
is_ubwc(struct pipe_resource * prsc,unsigned level)173 is_ubwc(struct pipe_resource *prsc, unsigned level)
174 {
175 return fd_resource_ubwc_enabled(fd_resource(prsc), level);
176 }
177
178 static void
dump_blit_info(const struct pipe_blit_info * info)179 dump_blit_info(const struct pipe_blit_info *info)
180 {
181 util_dump_blit_info(stderr, info);
182 fprintf(stderr, "\ndst resource: ");
183 util_dump_resource(stderr, info->dst.resource);
184 if (is_ubwc(info->dst.resource, info->dst.level))
185 fprintf(stderr, " (ubwc)");
186 fprintf(stderr, "\nsrc resource: ");
187 util_dump_resource(stderr, info->src.resource);
188 if (is_ubwc(info->src.resource, info->src.level))
189 fprintf(stderr, " (ubwc)");
190 fprintf(stderr, "\n");
191 }
192
193 static bool
can_do_blit(const struct pipe_blit_info * info)194 can_do_blit(const struct pipe_blit_info *info)
195 {
196 /* I think we can do scaling, but not in z dimension since that would
197 * require blending..
198 */
199 fail_if(info->dst.box.depth != info->src.box.depth);
200
201 /* Fail if unsupported format: */
202 fail_if(!ok_format(info->src.format));
203 fail_if(!ok_format(info->dst.format));
204
205 assert(!util_format_is_compressed(info->src.format));
206 assert(!util_format_is_compressed(info->dst.format));
207
208 fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
209
210 fail_if(!ok_dims(info->dst.resource, &info->dst.box, info->dst.level));
211
212 assert(info->dst.box.width >= 0);
213 assert(info->dst.box.height >= 0);
214 assert(info->dst.box.depth >= 0);
215
216 fail_if(info->dst.resource->nr_samples > 1);
217
218 fail_if(info->window_rectangle_include);
219
220 const struct util_format_description *src_desc =
221 util_format_description(info->src.format);
222 const struct util_format_description *dst_desc =
223 util_format_description(info->dst.format);
224 const int common_channels =
225 MIN2(src_desc->nr_channels, dst_desc->nr_channels);
226
227 if (info->mask & PIPE_MASK_RGBA) {
228 for (int i = 0; i < common_channels; i++) {
229 fail_if(memcmp(&src_desc->channel[i], &dst_desc->channel[i],
230 sizeof(src_desc->channel[0])));
231 }
232 }
233
234 fail_if(info->alpha_blend);
235
236 return true;
237 }
238
239 static bool
can_do_clear(const struct pipe_resource * prsc,unsigned level,const struct pipe_box * box)240 can_do_clear(const struct pipe_resource *prsc, unsigned level,
241 const struct pipe_box *box)
242 {
243 return ok_format(prsc->format) &&
244 ok_dims(prsc, box, level) &&
245 (fd_resource_nr_samples(prsc) == 1);
246
247 return true;
248 }
249
250 static void
emit_setup(struct fd_batch * batch)251 emit_setup(struct fd_batch *batch)
252 {
253 struct fd_ringbuffer *ring = batch->draw;
254 struct fd_screen *screen = batch->ctx->screen;
255
256 fd6_emit_flushes(batch->ctx, ring,
257 FD6_FLUSH_CCU_COLOR |
258 FD6_INVALIDATE_CCU_COLOR |
259 FD6_FLUSH_CCU_DEPTH |
260 FD6_INVALIDATE_CCU_DEPTH);
261
262 /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
263 OUT_WFI5(ring);
264 fd6_emit_ccu_cntl(ring, screen, false);
265 }
266
267 template <chip CHIP>
268 static void
emit_blit_setup(struct fd_ringbuffer * ring,enum pipe_format pfmt,bool scissor_enable,union pipe_color_union * color,uint32_t unknown_8c01,enum a6xx_rotation rotate)269 emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
270 bool scissor_enable, union pipe_color_union *color,
271 uint32_t unknown_8c01, enum a6xx_rotation rotate)
272 {
273 enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
274 bool is_srgb = util_format_is_srgb(pfmt);
275 enum a6xx_2d_ifmt ifmt = fd6_ifmt(fmt);
276
277 if (is_srgb) {
278 assert(ifmt == R2D_UNORM8);
279 ifmt = R2D_UNORM8_SRGB;
280 }
281
282 uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
283 A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) |
284 A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt) |
285 A6XX_RB_2D_BLIT_CNTL_ROTATE(rotate) |
286 COND(color, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
287 COND(scissor_enable, A6XX_RB_2D_BLIT_CNTL_SCISSOR);
288
289 OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
290 OUT_RING(ring, blit_cntl);
291
292 OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
293 OUT_RING(ring, blit_cntl);
294
295 if (fmt == FMT6_10_10_10_2_UNORM_DEST)
296 fmt = FMT6_16_16_16_16_FLOAT;
297
298 /* This register is probably badly named... it seems that it's
299 * controlling the internal/accumulator format or something like
300 * that. It's certainly not tied to only the src format.
301 */
302 OUT_REG(ring, SP_2D_DST_FORMAT(
303 CHIP,
304 .sint = util_format_is_pure_sint(pfmt),
305 .uint = util_format_is_pure_uint(pfmt),
306 .color_format = fmt,
307 .srgb = is_srgb,
308 .mask = 0xf,
309 ));
310
311 OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
312 OUT_RING(ring, unknown_8c01);
313 }
314
315 static void
emit_blit_buffer_dst(struct fd_ringbuffer * ring,struct fd_resource * dst,unsigned off,unsigned size)316 emit_blit_buffer_dst(struct fd_ringbuffer *ring, struct fd_resource *dst,
317 unsigned off, unsigned size)
318 {
319 OUT_REG(ring,
320 A6XX_RB_2D_DST_INFO(
321 .color_format = FMT6_8_UNORM,
322 .tile_mode = TILE6_LINEAR,
323 .color_swap = WZYX,
324 ),
325 A6XX_RB_2D_DST(
326 .bo = dst->bo,
327 .bo_offset = off,
328 ),
329 A6XX_RB_2D_DST_PITCH(size),
330 );
331 }
332
333 /* buffers need to be handled specially since x/width can exceed the bounds
334 * supported by hw.. if necessary decompose into (potentially) two 2D blits
335 */
336 template <chip CHIP>
337 static void
emit_blit_buffer(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)338 emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
339 const struct pipe_blit_info *info)
340 {
341 const struct pipe_box *sbox = &info->src.box;
342 const struct pipe_box *dbox = &info->dst.box;
343 struct fd_resource *src, *dst;
344 unsigned sshift, dshift;
345
346 if (DEBUG_BLIT) {
347 fprintf(stderr, "buffer blit: ");
348 dump_blit_info(info);
349 }
350
351 src = fd_resource(info->src.resource);
352 dst = fd_resource(info->dst.resource);
353
354 assert(src->layout.cpp == 1);
355 assert(dst->layout.cpp == 1);
356 assert(info->src.resource->format == info->dst.resource->format);
357 assert((sbox->y == 0) && (sbox->height == 1));
358 assert((dbox->y == 0) && (dbox->height == 1));
359 assert((sbox->z == 0) && (sbox->depth == 1));
360 assert((dbox->z == 0) && (dbox->depth == 1));
361 assert(sbox->width == dbox->width);
362 assert(info->src.level == 0);
363 assert(info->dst.level == 0);
364
365 /*
366 * Buffers can have dimensions bigger than max width, remap into
367 * multiple 1d blits to fit within max dimension
368 *
369 * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
370 * seems to prevent overfetch related faults. Not quite sure what
371 * the deal is there.
372 *
373 * Low 6 bits of SRC/DST addresses need to be zero (ie. address
374 * aligned to 64) so we need to shift src/dst x1/x2 to make up the
375 * difference. On top of already splitting up the blit so width
376 * isn't > 16k.
377 *
378 * We perhaps could do a bit better, if src and dst are aligned but
379 * in the worst case this means we have to split the copy up into
380 * 16k (0x4000) minus 64 (0x40).
381 */
382
383 sshift = sbox->x & 0x3f;
384 dshift = dbox->x & 0x3f;
385
386 emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0);
387
388 for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
389 unsigned soff, doff, w, p;
390
391 soff = (sbox->x + off) & ~0x3f;
392 doff = (dbox->x + off) & ~0x3f;
393
394 w = MIN2(sbox->width - off, (0x4000 - 0x40));
395 p = align(w, 64);
396
397 assert((soff + w) <= fd_bo_size(src->bo));
398 assert((doff + w) <= fd_bo_size(dst->bo));
399
400 /*
401 * Emit source:
402 */
403 OUT_REG(ring,
404 SP_PS_2D_SRC_INFO(
405 CHIP,
406 .color_format = FMT6_8_UNORM,
407 .tile_mode = TILE6_LINEAR,
408 .color_swap = WZYX,
409 .unk20 = true,
410 .unk22 = true,
411 ),
412 SP_PS_2D_SRC_SIZE(
413 CHIP,
414 .width = sshift + w,
415 .height = 1,
416 ),
417 SP_PS_2D_SRC(
418 CHIP,
419 .bo = src->bo,
420 .bo_offset = soff,
421 ),
422 SP_PS_2D_SRC_PITCH(
423 CHIP,
424 .pitch = p,
425 ),
426 );
427
428 /*
429 * Emit destination:
430 */
431 emit_blit_buffer_dst(ring, dst, doff, p);
432
433 /*
434 * Blit command:
435 */
436 OUT_REG(ring,
437 A6XX_GRAS_2D_SRC_TL_X(sshift),
438 A6XX_GRAS_2D_SRC_BR_X(sshift + w - 1),
439 A6XX_GRAS_2D_SRC_TL_Y(0),
440 A6XX_GRAS_2D_SRC_BR_Y(0),
441 );
442
443 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
444 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
445 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) |
446 A6XX_GRAS_2D_DST_BR_Y(0));
447
448 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
449 OUT_RING(ring, LABEL);
450 OUT_WFI5(ring);
451
452 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
453 OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
454
455 OUT_PKT7(ring, CP_BLIT, 1);
456 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
457
458 OUT_WFI5(ring);
459
460 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
461 OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
462 }
463 }
464
465 template <chip CHIP>
466 static void
fd6_clear_ubwc(struct fd_batch * batch,struct fd_resource * rsc)467 fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
468 {
469 struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
470 union pipe_color_union color = {};
471
472 emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0);
473
474 OUT_REG(ring,
475 SP_PS_2D_SRC_INFO(CHIP),
476 SP_PS_2D_SRC_SIZE(CHIP),
477 SP_PS_2D_SRC(CHIP),
478 SP_PS_2D_SRC_PITCH(CHIP),
479 );
480
481 OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
482 OUT_RING(ring, 0x00000000);
483 OUT_RING(ring, 0x00000000);
484 OUT_RING(ring, 0x00000000);
485 OUT_RING(ring, 0x00000000);
486
487 OUT_REG(ring,
488 A6XX_GRAS_2D_SRC_TL_X(0),
489 A6XX_GRAS_2D_SRC_BR_X(0),
490 A6XX_GRAS_2D_SRC_TL_Y(0),
491 A6XX_GRAS_2D_SRC_BR_Y(0),
492 );
493
494 unsigned size = rsc->layout.slices[0].offset;
495 unsigned offset = 0;
496
497 /* We could be more clever here and realize that we could use a
498 * larger width if the size is aligned to something more than a
499 * single page.. or even use a format larger than r8 in those
500 * cases. But for normal sized textures and even up to 16k x 16k
501 * at <= 4byte/pixel, we'll only go thru the loop once
502 */
503 const unsigned w = 0x1000;
504
505 /* ubwc size should always be page aligned: */
506 assert((size % w) == 0);
507
508 while (size > 0) {
509 const unsigned h = MIN2(0x4000, size / w);
510 /* width is already aligned to a suitable pitch: */
511 const unsigned p = w;
512
513 /*
514 * Emit destination:
515 */
516 emit_blit_buffer_dst(ring, rsc, offset, p);
517
518 /*
519 * Blit command:
520 */
521
522 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
523 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
524 OUT_RING(ring,
525 A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1));
526
527 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
528 OUT_RING(ring, LABEL);
529 OUT_WFI5(ring);
530
531 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
532 OUT_RING(ring, batch->ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
533
534 OUT_PKT7(ring, CP_BLIT, 1);
535 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
536
537 OUT_WFI5(ring);
538
539 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
540 OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
541
542 offset += w * h;
543 size -= w * h;
544 }
545
546 fd6_emit_flushes(batch->ctx, ring,
547 FD6_FLUSH_CCU_COLOR |
548 FD6_FLUSH_CCU_DEPTH |
549 FD6_FLUSH_CACHE |
550 FD6_WAIT_FOR_IDLE);
551 }
552
553 static void
emit_blit_dst(struct fd_ringbuffer * ring,struct pipe_resource * prsc,enum pipe_format pfmt,unsigned level,unsigned layer)554 emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
555 enum pipe_format pfmt, unsigned level, unsigned layer)
556 {
557 struct fd_resource *dst = fd_resource(prsc);
558 enum a6xx_format fmt =
559 fd6_color_format(pfmt, (enum a6xx_tile_mode)dst->layout.tile_mode);
560 enum a6xx_tile_mode tile =
561 (enum a6xx_tile_mode)fd_resource_tile_mode(prsc, level);
562 enum a3xx_color_swap swap =
563 fd6_color_swap(pfmt, (enum a6xx_tile_mode)dst->layout.tile_mode);
564 uint32_t pitch = fd_resource_pitch(dst, level);
565 bool ubwc_enabled = fd_resource_ubwc_enabled(dst, level);
566 unsigned off = fd_resource_offset(dst, level, layer);
567
568 if (fmt == FMT6_Z24_UNORM_S8_UINT)
569 fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
570
571 OUT_REG(ring,
572 A6XX_RB_2D_DST_INFO(
573 .color_format = fmt,
574 .tile_mode = tile,
575 .color_swap = swap,
576 .flags = ubwc_enabled,
577 .srgb = util_format_is_srgb(pfmt),
578 ),
579 A6XX_RB_2D_DST(
580 .bo = dst->bo,
581 .bo_offset = off,
582 ),
583 A6XX_RB_2D_DST_PITCH(pitch),
584 );
585
586 if (ubwc_enabled) {
587 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS, 6);
588 fd6_emit_flag_reference(ring, dst, level, layer);
589 OUT_RING(ring, 0x00000000);
590 OUT_RING(ring, 0x00000000);
591 OUT_RING(ring, 0x00000000);
592 }
593 }
594
595 template <chip CHIP>
596 static void
emit_blit_src(struct fd_ringbuffer * ring,const struct pipe_blit_info * info,unsigned layer,unsigned nr_samples)597 emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
598 unsigned layer, unsigned nr_samples)
599 {
600 struct fd_resource *src = fd_resource(info->src.resource);
601 enum a6xx_format sfmt =
602 fd6_texture_format(info->src.format, (enum a6xx_tile_mode)src->layout.tile_mode);
603 enum a6xx_tile_mode stile =
604 (enum a6xx_tile_mode)fd_resource_tile_mode(info->src.resource, info->src.level);
605 enum a3xx_color_swap sswap =
606 fd6_texture_swap(info->src.format, (enum a6xx_tile_mode)src->layout.tile_mode);
607 uint32_t pitch = fd_resource_pitch(src, info->src.level);
608 bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
609 unsigned soff = fd_resource_offset(src, info->src.level, layer);
610 uint32_t width = u_minify(src->b.b.width0, info->src.level) * nr_samples;
611 uint32_t height = u_minify(src->b.b.height0, info->src.level);
612 enum a3xx_msaa_samples samples = fd_msaa_samples(src->b.b.nr_samples);
613
614 if (info->src.format == PIPE_FORMAT_A8_UNORM)
615 sfmt = FMT6_A8_UNORM;
616
617 OUT_REG(ring,
618 SP_PS_2D_SRC_INFO(
619 CHIP,
620 .color_format = sfmt,
621 .tile_mode = stile,
622 .color_swap = sswap,
623 .flags = subwc_enabled,
624 .srgb = util_format_is_srgb(info->src.format),
625 .samples = samples,
626 .filter = (info->filter == PIPE_TEX_FILTER_LINEAR),
627 .samples_average = (samples > MSAA_ONE) && !info->sample0_only,
628 .unk20 = true,
629 .unk22 = true,
630 ),
631 SP_PS_2D_SRC_SIZE(
632 CHIP,
633 .width = width,
634 .height = height,
635 ),
636 SP_PS_2D_SRC(
637 CHIP,
638 .bo = src->bo,
639 .bo_offset = soff,
640 ),
641 SP_PS_2D_SRC_PITCH(
642 CHIP,
643 .pitch = pitch,
644 ),
645 );
646
647 if (subwc_enabled && fd_resource_ubwc_enabled(src, info->src.level)) {
648 OUT_REG(ring,
649 SP_PS_2D_SRC_FLAGS(
650 CHIP,
651 .bo = src->bo,
652 .bo_offset = fd_resource_ubwc_offset(src, info->src.level, layer),
653 ),
654 SP_PS_2D_SRC_FLAGS_PITCH(
655 CHIP, fdl_ubwc_pitch(&src->layout, info->src.level)),
656 );
657 }
658 }
659
660 template <chip CHIP>
661 static void
emit_blit_texture(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)662 emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
663 const struct pipe_blit_info *info)
664 {
665 const struct pipe_box *sbox = &info->src.box;
666 const struct pipe_box *dbox = &info->dst.box;
667 struct fd_resource *dst;
668 int sx1, sy1, sx2, sy2;
669 int dx1, dy1, dx2, dy2;
670
671 if (DEBUG_BLIT) {
672 fprintf(stderr, "texture blit: ");
673 dump_blit_info(info);
674 }
675
676 dst = fd_resource(info->dst.resource);
677
678 uint32_t nr_samples = fd_resource_nr_samples(&dst->b.b);
679
680 sx1 = sbox->x * nr_samples;
681 sy1 = sbox->y;
682 sx2 = (sbox->x + sbox->width) * nr_samples;
683 sy2 = sbox->y + sbox->height;
684
685 dx1 = dbox->x * nr_samples;
686 dy1 = dbox->y;
687 dx2 = (dbox->x + dbox->width) * nr_samples;
688 dy2 = dbox->y + dbox->height;
689
690 static const enum a6xx_rotation rotates[2][2] = {
691 {ROTATE_0, ROTATE_HFLIP},
692 {ROTATE_VFLIP, ROTATE_180},
693 };
694 bool mirror_x = (sx2 < sx1) != (dx2 < dx1);
695 bool mirror_y = (sy2 < sy1) != (dy2 < dy1);
696
697 enum a6xx_rotation rotate = rotates[mirror_y][mirror_x];
698
699 OUT_REG(ring,
700 A6XX_GRAS_2D_SRC_TL_X(MIN2(sx1, sx2)),
701 A6XX_GRAS_2D_SRC_BR_X(MAX2(sx1, sx2) - 1),
702 A6XX_GRAS_2D_SRC_TL_Y(MIN2(sy1, sy2)),
703 A6XX_GRAS_2D_SRC_BR_Y(MAX2(sy1, sy2) - 1),
704 );
705
706 OUT_REG(ring,
707 A6XX_GRAS_2D_DST_TL(.x = MIN2(dx1, dx2),
708 .y = MIN2(dy1, dy2)),
709 A6XX_GRAS_2D_DST_BR(.x = MAX2(dx1, dx2) - 1,
710 .y = MAX2(dy1, dy2) - 1),
711 );
712
713 if (info->scissor_enable) {
714 OUT_PKT4(ring, REG_A6XX_GRAS_2D_RESOLVE_CNTL_1, 2);
715 OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.minx) |
716 A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.miny));
717 OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
718 A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
719 }
720
721 emit_blit_setup<CHIP>(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate);
722
723 for (unsigned i = 0; i < info->dst.box.depth; i++) {
724
725 emit_blit_src<CHIP>(ring, info, sbox->z + i, nr_samples);
726 emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level,
727 dbox->z + i);
728
729 /*
730 * Blit command:
731 */
732 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
733 OUT_RING(ring, LABEL);
734 OUT_WFI5(ring);
735
736 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
737 OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
738
739 OUT_PKT7(ring, CP_BLIT, 1);
740 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
741
742 OUT_WFI5(ring);
743
744 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
745 OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
746 }
747 }
748
749 static void
emit_clear_color(struct fd_ringbuffer * ring,enum pipe_format pfmt,union pipe_color_union * color)750 emit_clear_color(struct fd_ringbuffer *ring, enum pipe_format pfmt,
751 union pipe_color_union *color)
752 {
753 switch (pfmt) {
754 case PIPE_FORMAT_Z24X8_UNORM:
755 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
756 case PIPE_FORMAT_X24S8_UINT: {
757 uint32_t depth_unorm24 = color->f[0] * ((1u << 24) - 1);
758 uint8_t stencil = color->ui[1];
759 color->ui[0] = depth_unorm24 & 0xff;
760 color->ui[1] = (depth_unorm24 >> 8) & 0xff;
761 color->ui[2] = (depth_unorm24 >> 16) & 0xff;
762 color->ui[3] = stencil;
763 break;
764 }
765 default:
766 break;
767 }
768
769 OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
770 switch (fd6_ifmt(fd6_color_format(pfmt, TILE6_LINEAR))) {
771 case R2D_UNORM8:
772 case R2D_UNORM8_SRGB:
773 /* The r2d ifmt is badly named, it also covers the signed case: */
774 if (util_format_is_snorm(pfmt)) {
775 OUT_RING(ring, float_to_byte_tex(color->f[0]));
776 OUT_RING(ring, float_to_byte_tex(color->f[1]));
777 OUT_RING(ring, float_to_byte_tex(color->f[2]));
778 OUT_RING(ring, float_to_byte_tex(color->f[3]));
779 } else {
780 OUT_RING(ring, float_to_ubyte(color->f[0]));
781 OUT_RING(ring, float_to_ubyte(color->f[1]));
782 OUT_RING(ring, float_to_ubyte(color->f[2]));
783 OUT_RING(ring, float_to_ubyte(color->f[3]));
784 }
785 break;
786 case R2D_FLOAT16:
787 OUT_RING(ring, _mesa_float_to_half(color->f[0]));
788 OUT_RING(ring, _mesa_float_to_half(color->f[1]));
789 OUT_RING(ring, _mesa_float_to_half(color->f[2]));
790 OUT_RING(ring, _mesa_float_to_half(color->f[3]));
791 break;
792 case R2D_FLOAT32:
793 case R2D_INT32:
794 case R2D_INT16:
795 case R2D_INT8:
796 default:
797 OUT_RING(ring, color->ui[0]);
798 OUT_RING(ring, color->ui[1]);
799 OUT_RING(ring, color->ui[2]);
800 OUT_RING(ring, color->ui[3]);
801 break;
802 }
803 }
804
805 template <chip CHIP>
806 void
fd6_clear_lrz(struct fd_batch * batch,struct fd_resource * zsbuf,struct fd_bo * lrz,double depth)807 fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
808 struct fd_bo *lrz, double depth)
809 {
810 struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
811
812 if (DEBUG_BLIT) {
813 fprintf(stderr, "lrz clear:\ndst resource: ");
814 util_dump_resource(stderr, &zsbuf->b.b);
815 fprintf(stderr, "\n");
816 }
817
818 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
819 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
820 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) |
821 A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1));
822
823 union pipe_color_union clear_color = { .f = {depth} };
824
825 emit_clear_color(ring, PIPE_FORMAT_Z16_UNORM, &clear_color);
826 emit_blit_setup<CHIP>(ring, PIPE_FORMAT_Z16_UNORM, false, &clear_color, 0, ROTATE_0);
827
828 OUT_REG(ring,
829 A6XX_RB_2D_DST_INFO(
830 .color_format = FMT6_16_UNORM,
831 .tile_mode = TILE6_LINEAR,
832 .color_swap = WZYX,
833 ),
834 A6XX_RB_2D_DST(
835 .bo = lrz,
836 ),
837 A6XX_RB_2D_DST_PITCH(zsbuf->lrz_pitch * 2),
838 );
839
840 /*
841 * Blit command:
842 */
843
844 OUT_PKT7(ring, CP_BLIT, 1);
845 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
846 }
847
848 template void fd6_clear_lrz<A6XX>(struct fd_batch *batch, struct fd_resource *zsbuf, struct fd_bo *lrz, double depth);
849 template void fd6_clear_lrz<A7XX>(struct fd_batch *batch, struct fd_resource *zsbuf, struct fd_bo *lrz, double depth);
850
851 /**
852 * Handle conversion of clear color
853 */
854 static union pipe_color_union
convert_color(enum pipe_format format,union pipe_color_union * pcolor)855 convert_color(enum pipe_format format, union pipe_color_union *pcolor)
856 {
857 union pipe_color_union color = *pcolor;
858
859 /* For solid-fill blits, the hw isn't going to convert from
860 * linear to srgb for us:
861 */
862 if (util_format_is_srgb(format)) {
863 for (int i = 0; i < 3; i++)
864 color.f[i] = util_format_linear_to_srgb_float(color.f[i]);
865 }
866
867 if (util_format_is_snorm(format)) {
868 for (int i = 0; i < 3; i++)
869 color.f[i] = CLAMP(color.f[i], -1.0f, 1.0f);
870 }
871
872 /* Note that float_to_ubyte() already clamps, for the unorm case */
873
874 return color;
875 }
876
877 template <chip CHIP>
878 void
fd6_clear_surface(struct fd_context * ctx,struct fd_ringbuffer * ring,struct pipe_surface * psurf,const struct pipe_box * box2d,union pipe_color_union * color,uint32_t unknown_8c01)879 fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
880 struct pipe_surface *psurf, const struct pipe_box *box2d,
881 union pipe_color_union *color, uint32_t unknown_8c01)
882 {
883 if (DEBUG_BLIT) {
884 fprintf(stderr, "surface clear:\ndst resource: ");
885 util_dump_resource(stderr, psurf->texture);
886 fprintf(stderr, "\n");
887 }
888
889 uint32_t nr_samples = fd_resource_nr_samples(psurf->texture);
890 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
891 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(box2d->x * nr_samples) |
892 A6XX_GRAS_2D_DST_TL_Y(box2d->y));
893 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X((box2d->x + box2d->width) * nr_samples - 1) |
894 A6XX_GRAS_2D_DST_BR_Y(box2d->y + box2d->height - 1));
895
896 union pipe_color_union clear_color = convert_color(psurf->format, color);
897
898 emit_clear_color(ring, psurf->format, &clear_color);
899 emit_blit_setup<CHIP>(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0);
900
901 for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer;
902 i++) {
903 emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level, i);
904
905 /*
906 * Blit command:
907 */
908 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
909 OUT_RING(ring, LABEL);
910 OUT_WFI5(ring);
911
912 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
913 OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
914
915 OUT_PKT7(ring, CP_BLIT, 1);
916 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
917
918 OUT_WFI5(ring);
919
920 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
921 OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
922 }
923 }
924
925 template void fd6_clear_surface<A6XX>(struct fd_context *ctx, struct fd_ringbuffer *ring,
926 struct pipe_surface *psurf, const struct pipe_box *box2d,
927 union pipe_color_union *color, uint32_t unknown_8c01);
928 template void fd6_clear_surface<A7XX>(struct fd_context *ctx, struct fd_ringbuffer *ring,
929 struct pipe_surface *psurf, const struct pipe_box *box2d,
930 union pipe_color_union *color, uint32_t unknown_8c01);
931
932 template <chip CHIP>
933 static void
fd6_clear_texture(struct pipe_context * pctx,struct pipe_resource * prsc,unsigned level,const struct pipe_box * box,const void * data)934 fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
935 unsigned level, const struct pipe_box *box, const void *data)
936 assert_dt
937 {
938 struct fd_context *ctx = fd_context(pctx);
939 struct fd_resource *rsc = fd_resource(prsc);
940
941 if (DEBUG_BLIT) {
942 fprintf(stderr, "surface texture:\ndst resource: ");
943 util_dump_resource(stderr, prsc);
944 fprintf(stderr, "\n");
945 }
946
947 if (!can_do_clear(prsc, level, box)) {
948 u_default_clear_texture(pctx, prsc, level, box, data);
949 return;
950 }
951
952 union pipe_color_union color;
953
954 if (util_format_is_depth_or_stencil(prsc->format)) {
955 const struct util_format_description *desc =
956 util_format_description(prsc->format);
957 float depth = 0.0f;
958 uint8_t stencil = 0;
959
960 if (util_format_has_depth(desc))
961 util_format_unpack_z_float(prsc->format, &depth, data, 1);
962
963 if (util_format_has_stencil(desc))
964 util_format_unpack_s_8uint(prsc->format, &stencil, data, 1);
965
966 if (rsc->stencil)
967 fd6_clear_texture<CHIP>(pctx, &rsc->stencil->b.b, level, box, &stencil);
968
969 color.f[0] = depth;
970 color.ui[1] = stencil;
971 } else {
972 util_format_unpack_rgba(prsc->format, color.ui, data, 1);
973 }
974
975 struct fd_batch *batch = fd_bc_alloc_batch(ctx, true);
976
977 fd_screen_lock(ctx->screen);
978 fd_batch_resource_write(batch, rsc);
979 fd_screen_unlock(ctx->screen);
980
981 assert(!batch->flushed);
982
983 /* Marking the batch as needing flush must come after the batch
984 * dependency tracking (resource_read()/resource_write()), as that
985 * can trigger a flush
986 */
987 fd_batch_needs_flush(batch);
988
989 fd_batch_update_queries(batch);
990
991 emit_setup(batch);
992
993 struct pipe_surface surf = {
994 .format = prsc->format,
995 .texture = prsc,
996 .u = {
997 .tex = {
998 .level = level,
999 .first_layer = box->z,
1000 .last_layer = box->depth + box->z - 1,
1001 },
1002 },
1003 };
1004
1005 fd6_clear_surface<CHIP>(ctx, batch->draw, &surf, box, &color, 0);
1006
1007 fd6_emit_flushes(batch->ctx, batch->draw,
1008 FD6_FLUSH_CCU_COLOR |
1009 FD6_FLUSH_CCU_DEPTH |
1010 FD6_FLUSH_CACHE |
1011 FD6_WAIT_FOR_IDLE);
1012
1013 fd_batch_flush(batch);
1014 fd_batch_reference(&batch, NULL);
1015
1016 /* Acc query state will have been dirtied by our fd_batch_update_queries, so
1017 * the ctx->batch may need to turn its queries back on.
1018 */
1019 fd_context_dirty(ctx, FD_DIRTY_QUERY);
1020 }
1021
1022 template <chip CHIP>
1023 void
fd6_resolve_tile(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,uint32_t unknown_8c01)1024 fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
1025 uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01)
1026 {
1027 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1028 uint64_t gmem_base = batch->ctx->screen->gmem_base + base;
1029 uint32_t gmem_pitch = gmem->bin_w * batch->framebuffer.samples *
1030 util_format_get_blocksize(psurf->format);
1031
1032 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
1033 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
1034 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(psurf->width - 1) |
1035 A6XX_GRAS_2D_DST_BR_Y(psurf->height - 1));
1036
1037 OUT_REG(ring,
1038 A6XX_GRAS_2D_SRC_TL_X(0),
1039 A6XX_GRAS_2D_SRC_BR_X(psurf->width - 1),
1040 A6XX_GRAS_2D_SRC_TL_Y(0),
1041 A6XX_GRAS_2D_SRC_BR_Y(psurf->height - 1),
1042 );
1043
1044 /* Enable scissor bit, which will take into account the window scissor
1045 * which is set per-tile
1046 */
1047 emit_blit_setup<CHIP>(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0);
1048
1049 /* We shouldn't be using GMEM in the layered rendering case: */
1050 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
1051
1052 emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level,
1053 psurf->u.tex.first_layer);
1054
1055 enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR);
1056 enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
1057
1058 OUT_REG(ring,
1059 SP_PS_2D_SRC_INFO(
1060 CHIP,
1061 .color_format = sfmt,
1062 .tile_mode = TILE6_2,
1063 .color_swap = WZYX,
1064 .srgb = util_format_is_srgb(psurf->format),
1065 .samples = samples,
1066 .samples_average = samples > MSAA_ONE,
1067 .unk20 = true,
1068 .unk22 = true,
1069 ),
1070 SP_PS_2D_SRC_SIZE(
1071 CHIP,
1072 .width = psurf->width,
1073 .height = psurf->height,
1074 ),
1075 SP_PS_2D_SRC(
1076 CHIP,
1077 .qword = gmem_base,
1078 ),
1079 SP_PS_2D_SRC_PITCH(
1080 CHIP,
1081 .pitch = gmem_pitch,
1082 ),
1083 );
1084
1085 /* sync GMEM writes with CACHE. */
1086 fd6_cache_inv(batch, ring);
1087
1088 /* Wait for CACHE_INVALIDATE to land */
1089 OUT_WFI5(ring);
1090
1091 OUT_PKT7(ring, CP_BLIT, 1);
1092 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
1093
1094 OUT_WFI5(ring);
1095
1096 /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
1097 * sysmem, and we generally assume that GMEM renderpasses leave their
1098 * results in sysmem, so we need to flush manually here.
1099 */
1100 fd6_emit_flushes(batch->ctx, ring,
1101 FD6_FLUSH_CCU_COLOR | FD6_WAIT_FOR_IDLE);
1102 }
1103
1104 template void fd6_resolve_tile<A6XX>(struct fd_batch *batch, struct fd_ringbuffer *ring,
1105 uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01);
1106 template void fd6_resolve_tile<A7XX>(struct fd_batch *batch, struct fd_ringbuffer *ring,
1107 uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01);
1108
1109 template <chip CHIP>
1110 static bool
handle_rgba_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1111 handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
1112 assert_dt
1113 {
1114 struct fd_batch *batch;
1115
1116 assert(!(info->mask & PIPE_MASK_ZS));
1117
1118 if (!can_do_blit(info))
1119 return false;
1120
1121 struct fd_resource *src = fd_resource(info->src.resource);
1122 struct fd_resource *dst = fd_resource(info->dst.resource);
1123
1124 fd6_validate_format(ctx, src, info->src.format);
1125 fd6_validate_format(ctx, dst, info->dst.format);
1126
1127 batch = fd_bc_alloc_batch(ctx, true);
1128
1129 fd_screen_lock(ctx->screen);
1130
1131 fd_batch_resource_read(batch, src);
1132 fd_batch_resource_write(batch, dst);
1133
1134 fd_screen_unlock(ctx->screen);
1135
1136 assert(!batch->flushed);
1137
1138 /* Marking the batch as needing flush must come after the batch
1139 * dependency tracking (resource_read()/resource_write()), as that
1140 * can trigger a flush
1141 */
1142 fd_batch_needs_flush(batch);
1143
1144 fd_batch_update_queries(batch);
1145
1146 emit_setup(batch);
1147
1148 DBG_BLIT(info, batch);
1149
1150 trace_start_blit(&batch->trace, batch->draw, info->src.resource->target,
1151 info->dst.resource->target);
1152
1153 if ((info->src.resource->target == PIPE_BUFFER) &&
1154 (info->dst.resource->target == PIPE_BUFFER)) {
1155 assert(src->layout.tile_mode == TILE6_LINEAR);
1156 assert(dst->layout.tile_mode == TILE6_LINEAR);
1157 emit_blit_buffer<CHIP>(ctx, batch->draw, info);
1158 } else {
1159 /* I don't *think* we need to handle blits between buffer <-> !buffer */
1160 assert(info->src.resource->target != PIPE_BUFFER);
1161 assert(info->dst.resource->target != PIPE_BUFFER);
1162 emit_blit_texture<CHIP>(ctx, batch->draw, info);
1163 }
1164
1165 trace_end_blit(&batch->trace, batch->draw);
1166
1167 fd6_emit_flushes(batch->ctx, batch->draw,
1168 FD6_FLUSH_CCU_COLOR |
1169 FD6_FLUSH_CCU_DEPTH |
1170 FD6_FLUSH_CACHE |
1171 FD6_WAIT_FOR_IDLE);
1172
1173 fd_batch_flush(batch);
1174 fd_batch_reference(&batch, NULL);
1175
1176 /* Acc query state will have been dirtied by our fd_batch_update_queries, so
1177 * the ctx->batch may need to turn its queries back on.
1178 */
1179 fd_context_dirty(ctx, FD_DIRTY_QUERY);
1180
1181 return true;
1182 }
1183
1184 /**
1185 * Re-written z/s blits can still fail for various reasons (for example MSAA).
1186 * But we want to do the fallback blit with the re-written pipe_blit_info,
1187 * in particular as u_blitter cannot blit stencil. So handle the fallback
1188 * ourself and never "fail".
1189 */
1190 template <chip CHIP>
1191 static bool
do_rewritten_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1192 do_rewritten_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
1193 assert_dt
1194 {
1195 bool success = handle_rgba_blit<CHIP>(ctx, info);
1196 if (!success) {
1197 success = fd_blitter_blit(ctx, info);
1198 }
1199 assert(success); /* fallback should never fail! */
1200 return success;
1201 }
1202
1203 /**
1204 * Handle depth/stencil blits either via u_blitter and/or re-writing the
1205 * blit into an equivilant format that we can handle
1206 */
1207 template <chip CHIP>
1208 static bool
handle_zs_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1209 handle_zs_blit(struct fd_context *ctx,
1210 const struct pipe_blit_info *info) assert_dt
1211 {
1212 struct pipe_blit_info blit = *info;
1213
1214 if (DEBUG_BLIT) {
1215 fprintf(stderr, "---- handle_zs_blit: ");
1216 dump_blit_info(info);
1217 }
1218
1219 if (info->src.format != info->dst.format)
1220 return false;
1221
1222 struct fd_resource *src = fd_resource(info->src.resource);
1223 struct fd_resource *dst = fd_resource(info->dst.resource);
1224
1225 switch (info->dst.format) {
1226 case PIPE_FORMAT_S8_UINT:
1227 assert(info->mask == PIPE_MASK_S);
1228 blit.mask = PIPE_MASK_R;
1229 blit.src.format = PIPE_FORMAT_R8_UINT;
1230 blit.dst.format = PIPE_FORMAT_R8_UINT;
1231 blit.sample0_only = true;
1232 return do_rewritten_blit<CHIP>(ctx, &blit);
1233
1234 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1235 if (info->mask & PIPE_MASK_Z) {
1236 blit.mask = PIPE_MASK_R;
1237 blit.src.format = PIPE_FORMAT_R32_FLOAT;
1238 blit.dst.format = PIPE_FORMAT_R32_FLOAT;
1239 blit.sample0_only = true;
1240 do_rewritten_blit<CHIP>(ctx, &blit);
1241 }
1242
1243 if (info->mask & PIPE_MASK_S) {
1244 blit.mask = PIPE_MASK_R;
1245 blit.src.format = PIPE_FORMAT_R8_UINT;
1246 blit.dst.format = PIPE_FORMAT_R8_UINT;
1247 blit.src.resource = &src->stencil->b.b;
1248 blit.dst.resource = &dst->stencil->b.b;
1249 blit.sample0_only = true;
1250 do_rewritten_blit<CHIP>(ctx, &blit);
1251 }
1252
1253 return true;
1254
1255 case PIPE_FORMAT_Z16_UNORM:
1256 blit.mask = PIPE_MASK_R;
1257 blit.src.format = PIPE_FORMAT_R16_UNORM;
1258 blit.dst.format = PIPE_FORMAT_R16_UNORM;
1259 blit.sample0_only = true;
1260 return do_rewritten_blit<CHIP>(ctx, &blit);
1261
1262 case PIPE_FORMAT_Z32_UNORM:
1263 case PIPE_FORMAT_Z32_FLOAT:
1264 assert(info->mask == PIPE_MASK_Z);
1265 blit.mask = PIPE_MASK_R;
1266 blit.src.format = PIPE_FORMAT_R32_UINT;
1267 blit.dst.format = PIPE_FORMAT_R32_UINT;
1268 blit.sample0_only = true;
1269 return do_rewritten_blit<CHIP>(ctx, &blit);
1270
1271 case PIPE_FORMAT_Z24X8_UNORM:
1272 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1273 blit.mask = 0;
1274 if (info->mask & PIPE_MASK_Z)
1275 blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
1276 if (info->mask & PIPE_MASK_S)
1277 blit.mask |= PIPE_MASK_A;
1278 blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1279 blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1280 /* non-UBWC Z24_UNORM_S8_UINT_AS_R8G8B8A8 is broken on a630, fall back to
1281 * 8888_unorm.
1282 */
1283 if (!ctx->screen->info->a6xx.has_z24uint_s8uint) {
1284 if (!src->layout.ubwc && !dst->layout.ubwc) {
1285 blit.src.format = PIPE_FORMAT_RGBA8888_UINT;
1286 blit.dst.format = PIPE_FORMAT_RGBA8888_UINT;
1287 } else {
1288 if (!src->layout.ubwc)
1289 blit.src.format = PIPE_FORMAT_RGBA8888_UNORM;
1290 if (!dst->layout.ubwc)
1291 blit.dst.format = PIPE_FORMAT_RGBA8888_UNORM;
1292 }
1293 }
1294 if (info->src.resource->nr_samples > 1 && blit.src.format != PIPE_FORMAT_RGBA8888_UINT)
1295 blit.sample0_only = true;
1296 return fd_blitter_blit(ctx, &blit);
1297
1298 default:
1299 return false;
1300 }
1301 }
1302
1303 template <chip CHIP>
1304 static bool
handle_compressed_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1305 handle_compressed_blit(struct fd_context *ctx,
1306 const struct pipe_blit_info *info) assert_dt
1307 {
1308 struct pipe_blit_info blit = *info;
1309
1310 if (DEBUG_BLIT) {
1311 fprintf(stderr, "---- handle_compressed_blit: ");
1312 dump_blit_info(info);
1313 }
1314
1315 if (info->src.format != info->dst.format)
1316 return fd_blitter_blit(ctx, info);
1317
1318 if (util_format_get_blocksize(info->src.format) == 8) {
1319 blit.src.format = blit.dst.format = PIPE_FORMAT_R16G16B16A16_UINT;
1320 } else {
1321 assert(util_format_get_blocksize(info->src.format) == 16);
1322 blit.src.format = blit.dst.format = PIPE_FORMAT_R32G32B32A32_UINT;
1323 }
1324
1325 int bw = util_format_get_blockwidth(info->src.format);
1326 int bh = util_format_get_blockheight(info->src.format);
1327
1328 /* NOTE: x/y *must* be aligned to block boundary (ie. in
1329 * glCompressedTexSubImage2D()) but width/height may not
1330 * be:
1331 */
1332
1333 assert((blit.src.box.x % bw) == 0);
1334 assert((blit.src.box.y % bh) == 0);
1335
1336 blit.src.box.x /= bw;
1337 blit.src.box.y /= bh;
1338 blit.src.box.width = DIV_ROUND_UP(blit.src.box.width, bw);
1339 blit.src.box.height = DIV_ROUND_UP(blit.src.box.height, bh);
1340
1341 assert((blit.dst.box.x % bw) == 0);
1342 assert((blit.dst.box.y % bh) == 0);
1343
1344 blit.dst.box.x /= bw;
1345 blit.dst.box.y /= bh;
1346 blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw);
1347 blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
1348
1349 return do_rewritten_blit<CHIP>(ctx, &blit);
1350 }
1351
1352 /**
1353 * For SNORM formats, copy them as the equivalent UNORM format. If we treat
1354 * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81
1355 * (also -1.0), when we're supposed to be memcpying the bits. See
1356 * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
1357 */
1358 template <chip CHIP>
1359 static bool
handle_snorm_copy_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1360 handle_snorm_copy_blit(struct fd_context *ctx,
1361 const struct pipe_blit_info *info)
1362 assert_dt
1363 {
1364 /* If we're interpolating the pixels, we can't just treat the values as unorm. */
1365 if (info->filter == PIPE_TEX_FILTER_LINEAR)
1366 return false;
1367
1368 struct pipe_blit_info blit = *info;
1369
1370 blit.src.format = blit.dst.format = util_format_snorm_to_unorm(info->src.format);
1371
1372 return do_rewritten_blit<CHIP>(ctx, &blit);
1373 }
1374
1375 template <chip CHIP>
1376 static bool
fd6_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1377 fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt
1378 {
1379 if (info->mask & PIPE_MASK_ZS)
1380 return handle_zs_blit<CHIP>(ctx, info);
1381
1382 if (util_format_is_compressed(info->src.format) ||
1383 util_format_is_compressed(info->dst.format))
1384 return handle_compressed_blit<CHIP>(ctx, info);
1385
1386 if ((info->src.format == info->dst.format) &&
1387 util_format_is_snorm(info->src.format))
1388 return handle_snorm_copy_blit<CHIP>(ctx, info);
1389
1390 return handle_rgba_blit<CHIP>(ctx, info);
1391 }
1392
1393 template <chip CHIP>
1394 void
fd6_blitter_init(struct pipe_context * pctx)1395 fd6_blitter_init(struct pipe_context *pctx)
1396 disable_thread_safety_analysis
1397 {
1398 struct fd_context *ctx = fd_context(pctx);
1399
1400 ctx->clear_ubwc = fd6_clear_ubwc<CHIP>;
1401 ctx->validate_format = fd6_validate_format;
1402
1403 if (FD_DBG(NOBLIT))
1404 return;
1405
1406 pctx->clear_texture = fd6_clear_texture<CHIP>;
1407 ctx->blit = fd6_blit<CHIP>;
1408 }
1409
1410 /* Teach the compiler about needed variants: */
1411 template void fd6_blitter_init<A6XX>(struct pipe_context *pctx);
1412 template void fd6_blitter_init<A7XX>(struct pipe_context *pctx);
1413
1414 unsigned
fd6_tile_mode_for_format(enum pipe_format pfmt)1415 fd6_tile_mode_for_format(enum pipe_format pfmt)
1416 {
1417 /* basically just has to be a format we can blit, so uploads/downloads
1418 * via linear staging buffer works:
1419 */
1420 if (ok_format(pfmt))
1421 return TILE6_3;
1422
1423 return TILE6_LINEAR;
1424 }
1425 unsigned
fd6_tile_mode(const struct pipe_resource * tmpl)1426 fd6_tile_mode(const struct pipe_resource *tmpl)
1427 {
1428 /* if the mipmap level 0 is still too small to be tiled, then don't
1429 * bother pretending:
1430 */
1431 if ((tmpl->width0 < FDL_MIN_UBWC_WIDTH) &&
1432 !util_format_is_depth_or_stencil(tmpl->format))
1433 return TILE6_LINEAR;
1434
1435 return fd6_tile_mode_for_format(tmpl->format);
1436 }
1437