1 /*
2 * Copyright © 2017 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 * SPDX-License-Identifier: MIT
5 *
6 * Authors:
7 * Rob Clark <robclark@freedesktop.org>
8 */
9
10 #define FD_BO_NO_HARDPIN 1
11
12 #include "util/format_srgb.h"
13 #include "util/half_float.h"
14 #include "util/u_dump.h"
15 #include "util/u_helpers.h"
16 #include "util/u_log.h"
17 #include "util/u_transfer.h"
18 #include "util/u_surface.h"
19
20 #include "freedreno_blitter.h"
21 #include "freedreno_fence.h"
22 #include "freedreno_resource.h"
23 #include "freedreno_tracepoints.h"
24
25 #include "fd6_barrier.h"
26 #include "fd6_blitter.h"
27 #include "fd6_emit.h"
28 #include "fd6_pack.h"
29 #include "fd6_resource.h"
30
31 static inline enum a6xx_2d_ifmt
fd6_ifmt(enum a6xx_format fmt)32 fd6_ifmt(enum a6xx_format fmt)
33 {
34 switch (fmt) {
35 case FMT6_A8_UNORM:
36 case FMT6_8_UNORM:
37 case FMT6_8_SNORM:
38 case FMT6_8_8_UNORM:
39 case FMT6_8_8_SNORM:
40 case FMT6_8_8_8_8_UNORM:
41 case FMT6_8_8_8_X8_UNORM:
42 case FMT6_8_8_8_8_SNORM:
43 case FMT6_4_4_4_4_UNORM:
44 case FMT6_5_5_5_1_UNORM:
45 case FMT6_5_6_5_UNORM:
46 return R2D_UNORM8;
47
48 case FMT6_32_UINT:
49 case FMT6_32_SINT:
50 case FMT6_32_32_UINT:
51 case FMT6_32_32_SINT:
52 case FMT6_32_32_32_32_UINT:
53 case FMT6_32_32_32_32_SINT:
54 return R2D_INT32;
55
56 case FMT6_16_UINT:
57 case FMT6_16_SINT:
58 case FMT6_16_16_UINT:
59 case FMT6_16_16_SINT:
60 case FMT6_16_16_16_16_UINT:
61 case FMT6_16_16_16_16_SINT:
62 case FMT6_10_10_10_2_UINT:
63 return R2D_INT16;
64
65 case FMT6_8_UINT:
66 case FMT6_8_SINT:
67 case FMT6_8_8_UINT:
68 case FMT6_8_8_SINT:
69 case FMT6_8_8_8_8_UINT:
70 case FMT6_8_8_8_8_SINT:
71 case FMT6_Z24_UNORM_S8_UINT:
72 case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
73 return R2D_INT8;
74
75 case FMT6_16_UNORM:
76 case FMT6_16_SNORM:
77 case FMT6_16_16_UNORM:
78 case FMT6_16_16_SNORM:
79 case FMT6_16_16_16_16_UNORM:
80 case FMT6_16_16_16_16_SNORM:
81 case FMT6_32_FLOAT:
82 case FMT6_32_32_FLOAT:
83 case FMT6_32_32_32_32_FLOAT:
84 return R2D_FLOAT32;
85
86 case FMT6_16_FLOAT:
87 case FMT6_16_16_FLOAT:
88 case FMT6_16_16_16_16_FLOAT:
89 case FMT6_11_11_10_FLOAT:
90 case FMT6_10_10_10_2_UNORM_DEST:
91 return R2D_FLOAT16;
92
93 default:
94 unreachable("bad format");
95 return (enum a6xx_2d_ifmt)0;
96 }
97 }
98
99 /* Make sure none of the requested dimensions extend beyond the size of the
100 * resource. Not entirely sure why this happens, but sometimes it does, and
101 * w/ 2d blt doesn't have wrap modes like a sampler, so force those cases
102 * back to u_blitter
103 */
104 static bool
ok_dims(const struct pipe_resource * r,const struct pipe_box * b,int lvl)105 ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
106 {
107 int last_layer =
108 r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl) : r->array_size;
109
110 return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
111 (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
112 (b->z >= 0) && (b->z + b->depth <= last_layer);
113 }
114
115 static bool
ok_format(enum pipe_format pfmt)116 ok_format(enum pipe_format pfmt)
117 {
118 enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
119
120 if (util_format_is_compressed(pfmt))
121 return true;
122
123 switch (pfmt) {
124 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
125 case PIPE_FORMAT_Z24X8_UNORM:
126 case PIPE_FORMAT_Z16_UNORM:
127 case PIPE_FORMAT_Z32_UNORM:
128 case PIPE_FORMAT_Z32_FLOAT:
129 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
130 case PIPE_FORMAT_S8_UINT:
131 return true;
132 default:
133 break;
134 }
135
136 if (fmt == FMT6_NONE)
137 return false;
138
139 return true;
140 }
141
142 #define DEBUG_BLIT 0
143 #define DEBUG_BLIT_FALLBACK 0
144
145 #define fail_if(cond) \
146 do { \
147 if (cond) { \
148 if (DEBUG_BLIT_FALLBACK) { \
149 fprintf(stderr, "falling back: %s for blit:\n", #cond); \
150 dump_blit_info(info); \
151 } \
152 return false; \
153 } \
154 } while (0)
155
156 static bool
is_ubwc(struct pipe_resource * prsc,unsigned level)157 is_ubwc(struct pipe_resource *prsc, unsigned level)
158 {
159 return fd_resource_ubwc_enabled(fd_resource(prsc), level);
160 }
161
162 static void
dump_blit_info(const struct pipe_blit_info * info)163 dump_blit_info(const struct pipe_blit_info *info)
164 {
165 util_dump_blit_info(stderr, info);
166 fprintf(stderr, "\n\tdst resource: ");
167 util_dump_resource(stderr, info->dst.resource);
168 if (is_ubwc(info->dst.resource, info->dst.level))
169 fprintf(stderr, " (ubwc)");
170 fprintf(stderr, "\n\tsrc resource: ");
171 util_dump_resource(stderr, info->src.resource);
172 if (is_ubwc(info->src.resource, info->src.level))
173 fprintf(stderr, " (ubwc)");
174 fprintf(stderr, "\n\n");
175 }
176
177 static bool
can_do_blit(const struct pipe_blit_info * info)178 can_do_blit(const struct pipe_blit_info *info)
179 {
180 /* I think we can do scaling, but not in z dimension since that would
181 * require blending..
182 */
183 fail_if(info->dst.box.depth != info->src.box.depth);
184
185 /* Fail if unsupported format: */
186 fail_if(!ok_format(info->src.format));
187 fail_if(!ok_format(info->dst.format));
188
189 assert(!util_format_is_compressed(info->src.format));
190 assert(!util_format_is_compressed(info->dst.format));
191
192 fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
193
194 /* We _shouldn't_ be getting negative dst coords, but do as a result of
195 * y-flip in do_blit_framebuffer(). See
196 * dEQP-GLES31.functional.primitive_bounding_box.blit_fbo.blit_fbo_to_default
197 */
198 fail_if(info->dst.box.x < 0);
199 fail_if(info->dst.box.y < 0);
200
201 assert(info->dst.box.width >= 0);
202 assert(info->dst.box.height >= 0);
203 assert(info->dst.box.depth >= 0);
204
205 fail_if(info->dst.resource->nr_samples > 1);
206 fail_if(info->src.resource->nr_samples > 1);
207
208 fail_if(info->window_rectangle_include);
209
210 /* The blitter can't handle the needed swizzle gymnastics to convert
211 * to/from L/A formats:
212 */
213 fail_if(info->swizzle_enable);
214 if (info->src.format != info->dst.format) {
215 fail_if(util_format_is_luminance(info->dst.format));
216 fail_if(util_format_is_alpha(info->dst.format));
217 fail_if(util_format_is_luminance_alpha(info->dst.format));
218 fail_if(util_format_is_luminance(info->src.format));
219 fail_if(util_format_is_alpha(info->src.format));
220 fail_if(util_format_is_luminance_alpha(info->src.format));
221 }
222
223 const struct util_format_description *src_desc =
224 util_format_description(info->src.format);
225 const struct util_format_description *dst_desc =
226 util_format_description(info->dst.format);
227 const int common_channels =
228 MIN2(src_desc->nr_channels, dst_desc->nr_channels);
229
230 if (info->mask & PIPE_MASK_RGBA) {
231 for (int i = 0; i < common_channels; i++) {
232 fail_if(memcmp(&src_desc->channel[i], &dst_desc->channel[i],
233 sizeof(src_desc->channel[0])));
234 }
235 }
236
237 fail_if(info->alpha_blend);
238
239 return true;
240 }
241
242 static bool
can_do_clear(const struct pipe_resource * prsc,unsigned level,const struct pipe_box * box)243 can_do_clear(const struct pipe_resource *prsc, unsigned level,
244 const struct pipe_box *box)
245 {
246 return ok_format(prsc->format) &&
247 ok_dims(prsc, box, level) &&
248 (fd_resource_nr_samples(prsc) == 1);
249
250 return true;
251 }
252
253 template <chip CHIP>
254 static void
emit_setup(struct fd_batch * batch)255 emit_setup(struct fd_batch *batch)
256 {
257 struct fd_ringbuffer *ring = batch->draw;
258 struct fd_screen *screen = batch->ctx->screen;
259
260 fd6_emit_flushes<CHIP>(batch->ctx, ring,
261 FD6_FLUSH_CCU_COLOR |
262 FD6_INVALIDATE_CCU_COLOR |
263 FD6_FLUSH_CCU_DEPTH |
264 FD6_INVALIDATE_CCU_DEPTH);
265
266 /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
267 fd6_emit_ccu_cntl<CHIP>(ring, screen, false);
268 }
269
270 template <chip CHIP>
271 static void
emit_blit_fini(struct fd_context * ctx,struct fd_ringbuffer * ring)272 emit_blit_fini(struct fd_context *ctx, struct fd_ringbuffer *ring)
273 {
274 fd6_event_write<CHIP>(ctx, ring, FD_LABEL);
275 OUT_WFI5(ring);
276
277 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
278 OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
279
280 OUT_PKT7(ring, CP_BLIT, 1);
281 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
282
283 OUT_WFI5(ring);
284
285 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
286 OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
287 }
288 FD_GENX(emit_blit_fini);
289
290 template <chip CHIP>
291 static void
emit_blit_setup(struct fd_ringbuffer * ring,enum pipe_format pfmt,bool scissor_enable,union pipe_color_union * color,uint32_t unknown_8c01,enum a6xx_rotation rotate)292 emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
293 bool scissor_enable, union pipe_color_union *color,
294 uint32_t unknown_8c01, enum a6xx_rotation rotate)
295 {
296 enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
297 bool is_srgb = util_format_is_srgb(pfmt);
298 enum a6xx_2d_ifmt ifmt = fd6_ifmt(fmt);
299
300 if (is_srgb) {
301 assert(ifmt == R2D_UNORM8);
302 ifmt = R2D_UNORM8_SRGB;
303 }
304
305 uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
306 A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) |
307 A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt) |
308 A6XX_RB_2D_BLIT_CNTL_ROTATE(rotate) |
309 COND(color, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
310 COND(scissor_enable, A6XX_RB_2D_BLIT_CNTL_SCISSOR);
311
312 OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
313 OUT_RING(ring, blit_cntl);
314
315 OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
316 OUT_RING(ring, blit_cntl);
317
318 if (CHIP >= A7XX) {
319 OUT_REG(ring, A7XX_TPL1_2D_SRC_CNTL(
320 .raw_copy = false,
321 .start_offset_texels = 0,
322 .type = A6XX_TEX_2D,
323 ));
324 }
325
326 if (fmt == FMT6_10_10_10_2_UNORM_DEST)
327 fmt = FMT6_16_16_16_16_FLOAT;
328
329 /* This register is probably badly named... it seems that it's
330 * controlling the internal/accumulator format or something like
331 * that. It's certainly not tied to only the src format.
332 */
333 OUT_REG(ring, SP_2D_DST_FORMAT(
334 CHIP,
335 .sint = util_format_is_pure_sint(pfmt),
336 .uint = util_format_is_pure_uint(pfmt),
337 .color_format = fmt,
338 .srgb = is_srgb,
339 .mask = 0xf,
340 ));
341
342 OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
343 OUT_RING(ring, unknown_8c01);
344 }
345
346 static void
emit_blit_buffer_dst(struct fd_ringbuffer * ring,struct fd_resource * dst,unsigned off,unsigned size,a6xx_format color_format)347 emit_blit_buffer_dst(struct fd_ringbuffer *ring, struct fd_resource *dst,
348 unsigned off, unsigned size, a6xx_format color_format)
349 {
350 OUT_REG(ring,
351 A6XX_RB_2D_DST_INFO(
352 .color_format = color_format,
353 .tile_mode = TILE6_LINEAR,
354 .color_swap = WZYX,
355 ),
356 A6XX_RB_2D_DST(
357 .bo = dst->bo,
358 .bo_offset = off,
359 ),
360 A6XX_RB_2D_DST_PITCH(size),
361 );
362 }
363
364 /* buffers need to be handled specially since x/width can exceed the bounds
365 * supported by hw.. if necessary decompose into (potentially) two 2D blits
366 */
367 template <chip CHIP>
368 static void
emit_blit_buffer(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)369 emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
370 const struct pipe_blit_info *info)
371 {
372 const struct pipe_box *sbox = &info->src.box;
373 const struct pipe_box *dbox = &info->dst.box;
374 struct fd_resource *src, *dst;
375 unsigned sshift, dshift;
376
377 if (DEBUG_BLIT) {
378 fprintf(stderr, "buffer blit: ");
379 dump_blit_info(info);
380 }
381
382 src = fd_resource(info->src.resource);
383 dst = fd_resource(info->dst.resource);
384
385 assert(src->layout.cpp == 1);
386 assert(dst->layout.cpp == 1);
387 assert(info->src.resource->format == info->dst.resource->format);
388 assert((sbox->y == 0) && (sbox->height == 1));
389 assert((dbox->y == 0) && (dbox->height == 1));
390 assert((sbox->z == 0) && (sbox->depth == 1));
391 assert((dbox->z == 0) && (dbox->depth == 1));
392 assert(sbox->width == dbox->width);
393 assert(info->src.level == 0);
394 assert(info->dst.level == 0);
395
396 /*
397 * Buffers can have dimensions bigger than max width, remap into
398 * multiple 1d blits to fit within max dimension
399 *
400 * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
401 * seems to prevent overfetch related faults. Not quite sure what
402 * the deal is there.
403 *
404 * Low 6 bits of SRC/DST addresses need to be zero (ie. address
405 * aligned to 64) so we need to shift src/dst x1/x2 to make up the
406 * difference. On top of already splitting up the blit so width
407 * isn't > 16k.
408 *
409 * We perhaps could do a bit better, if src and dst are aligned but
410 * in the worst case this means we have to split the copy up into
411 * 16k (0x4000) minus 64 (0x40).
412 */
413
414 sshift = sbox->x & 0x3f;
415 dshift = dbox->x & 0x3f;
416
417 emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0);
418
419 for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
420 unsigned soff, doff, w, p;
421
422 soff = (sbox->x + off) & ~0x3f;
423 doff = (dbox->x + off) & ~0x3f;
424
425 w = MIN2(sbox->width - off, (0x4000 - 0x40));
426 p = align(w, 64);
427
428 assert((soff + w) <= fd_bo_size(src->bo));
429 assert((doff + w) <= fd_bo_size(dst->bo));
430
431 /*
432 * Emit source:
433 */
434 OUT_REG(ring,
435 SP_PS_2D_SRC_INFO(
436 CHIP,
437 .color_format = FMT6_8_UNORM,
438 .tile_mode = TILE6_LINEAR,
439 .color_swap = WZYX,
440 .unk20 = true,
441 .unk22 = true,
442 ),
443 SP_PS_2D_SRC_SIZE(
444 CHIP,
445 .width = sshift + w,
446 .height = 1,
447 ),
448 SP_PS_2D_SRC(
449 CHIP,
450 .bo = src->bo,
451 .bo_offset = soff,
452 ),
453 SP_PS_2D_SRC_PITCH(
454 CHIP,
455 .pitch = p,
456 ),
457 );
458
459 /*
460 * Emit destination:
461 */
462 emit_blit_buffer_dst(ring, dst, doff, p, FMT6_8_UNORM);
463
464 /*
465 * Blit command:
466 */
467 OUT_REG(ring,
468 A6XX_GRAS_2D_SRC_TL_X(sshift),
469 A6XX_GRAS_2D_SRC_BR_X(sshift + w - 1),
470 A6XX_GRAS_2D_SRC_TL_Y(0),
471 A6XX_GRAS_2D_SRC_BR_Y(0),
472 );
473
474 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
475 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
476 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) |
477 A6XX_GRAS_2D_DST_BR_Y(0));
478
479 emit_blit_fini<CHIP>(ctx, ring);
480 }
481 }
482
483 template <chip CHIP>
484 static void
fd6_clear_ubwc(struct fd_batch * batch,struct fd_resource * rsc)485 fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
486 {
487 struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
488 union pipe_color_union color = {};
489
490 emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0);
491
492 OUT_REG(ring,
493 SP_PS_2D_SRC_INFO(CHIP),
494 SP_PS_2D_SRC_SIZE(CHIP),
495 SP_PS_2D_SRC(CHIP),
496 SP_PS_2D_SRC_PITCH(CHIP),
497 );
498
499 OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
500 OUT_RING(ring, 0x00000000);
501 OUT_RING(ring, 0x00000000);
502 OUT_RING(ring, 0x00000000);
503 OUT_RING(ring, 0x00000000);
504
505 OUT_REG(ring,
506 A6XX_GRAS_2D_SRC_TL_X(0),
507 A6XX_GRAS_2D_SRC_BR_X(0),
508 A6XX_GRAS_2D_SRC_TL_Y(0),
509 A6XX_GRAS_2D_SRC_BR_Y(0),
510 );
511
512 unsigned size = rsc->layout.slices[0].offset;
513 unsigned offset = 0;
514
515 /* We could be more clever here and realize that we could use a
516 * larger width if the size is aligned to something more than a
517 * single page.. or even use a format larger than r8 in those
518 * cases. But for normal sized textures and even up to 16k x 16k
519 * at <= 4byte/pixel, we'll only go thru the loop once
520 */
521 const unsigned w = 0x1000;
522
523 /* ubwc size should always be page aligned: */
524 assert((size % w) == 0);
525
526 while (size > 0) {
527 const unsigned h = MIN2(0x4000, size / w);
528 /* width is already aligned to a suitable pitch: */
529 const unsigned p = w;
530
531 /*
532 * Emit destination:
533 */
534 emit_blit_buffer_dst(ring, rsc, offset, p, FMT6_8_UNORM);
535
536 /*
537 * Blit command:
538 */
539
540 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
541 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
542 OUT_RING(ring,
543 A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1));
544
545 emit_blit_fini<CHIP>(batch->ctx, ring);
546 offset += w * h;
547 size -= w * h;
548 }
549
550 fd6_emit_flushes<CHIP>(batch->ctx, ring,
551 FD6_FLUSH_CCU_COLOR |
552 FD6_FLUSH_CCU_DEPTH |
553 FD6_FLUSH_CACHE |
554 FD6_WAIT_FOR_IDLE);
555 }
556
557 static void
emit_blit_dst(struct fd_ringbuffer * ring,struct pipe_resource * prsc,enum pipe_format pfmt,unsigned level,unsigned layer)558 emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
559 enum pipe_format pfmt, unsigned level, unsigned layer)
560 {
561 struct fd_resource *dst = fd_resource(prsc);
562 enum a6xx_format fmt =
563 fd6_color_format(pfmt, (enum a6xx_tile_mode)dst->layout.tile_mode);
564 enum a6xx_tile_mode tile =
565 (enum a6xx_tile_mode)fd_resource_tile_mode(prsc, level);
566 enum a3xx_color_swap swap =
567 fd6_color_swap(pfmt, (enum a6xx_tile_mode)dst->layout.tile_mode,
568 false);
569 uint32_t pitch = fd_resource_pitch(dst, level);
570 bool ubwc_enabled = fd_resource_ubwc_enabled(dst, level);
571 unsigned off = fd_resource_offset(dst, level, layer);
572
573 if (fmt == FMT6_Z24_UNORM_S8_UINT)
574 fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
575
576 OUT_REG(ring,
577 A6XX_RB_2D_DST_INFO(
578 .color_format = fmt,
579 .tile_mode = tile,
580 .color_swap = swap,
581 .flags = ubwc_enabled,
582 .srgb = util_format_is_srgb(pfmt),
583 ),
584 A6XX_RB_2D_DST(
585 .bo = dst->bo,
586 .bo_offset = off,
587 ),
588 A6XX_RB_2D_DST_PITCH(pitch),
589 );
590
591 if (ubwc_enabled) {
592 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS, 6);
593 fd6_emit_flag_reference(ring, dst, level, layer);
594 OUT_RING(ring, 0x00000000);
595 OUT_RING(ring, 0x00000000);
596 OUT_RING(ring, 0x00000000);
597 }
598 }
599
600 template <chip CHIP>
601 static void
emit_blit_src(struct fd_ringbuffer * ring,const struct pipe_blit_info * info,unsigned layer,unsigned nr_samples)602 emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
603 unsigned layer, unsigned nr_samples)
604 {
605 struct fd_resource *src = fd_resource(info->src.resource);
606 enum a6xx_format sfmt =
607 fd6_texture_format(info->src.format, (enum a6xx_tile_mode)src->layout.tile_mode, false);
608 enum a6xx_tile_mode stile =
609 (enum a6xx_tile_mode)fd_resource_tile_mode(info->src.resource, info->src.level);
610 enum a3xx_color_swap sswap =
611 fd6_texture_swap(info->src.format, (enum a6xx_tile_mode)src->layout.tile_mode, false);
612 uint32_t pitch = fd_resource_pitch(src, info->src.level);
613 bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
614 unsigned soff = fd_resource_offset(src, info->src.level, layer);
615 uint32_t width = u_minify(src->b.b.width0, info->src.level) * nr_samples;
616 uint32_t height = u_minify(src->b.b.height0, info->src.level);
617 enum a3xx_msaa_samples samples = fd_msaa_samples(src->b.b.nr_samples);
618
619 if (info->src.format == PIPE_FORMAT_A8_UNORM)
620 sfmt = FMT6_A8_UNORM;
621
622 OUT_REG(ring,
623 SP_PS_2D_SRC_INFO(
624 CHIP,
625 .color_format = sfmt,
626 .tile_mode = stile,
627 .color_swap = sswap,
628 .flags = subwc_enabled,
629 .srgb = util_format_is_srgb(info->src.format),
630 .samples = samples,
631 .filter = (info->filter == PIPE_TEX_FILTER_LINEAR),
632 .samples_average = (samples > MSAA_ONE) && !info->sample0_only,
633 .unk20 = true,
634 .unk22 = true,
635 ),
636 SP_PS_2D_SRC_SIZE(
637 CHIP,
638 .width = width,
639 .height = height,
640 ),
641 SP_PS_2D_SRC(
642 CHIP,
643 .bo = src->bo,
644 .bo_offset = soff,
645 ),
646 SP_PS_2D_SRC_PITCH(
647 CHIP,
648 .pitch = pitch,
649 ),
650 );
651
652 if (subwc_enabled && fd_resource_ubwc_enabled(src, info->src.level)) {
653 OUT_REG(ring,
654 SP_PS_2D_SRC_FLAGS(
655 CHIP,
656 .bo = src->bo,
657 .bo_offset = fd_resource_ubwc_offset(src, info->src.level, layer),
658 ),
659 SP_PS_2D_SRC_FLAGS_PITCH(
660 CHIP, fdl_ubwc_pitch(&src->layout, info->src.level)),
661 );
662 }
663 }
664
665 template <chip CHIP>
666 static void
emit_blit_texture(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)667 emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
668 const struct pipe_blit_info *info)
669 {
670 const struct pipe_box *sbox = &info->src.box;
671 const struct pipe_box *dbox = &info->dst.box;
672 struct fd_resource *dst;
673 int sx1, sy1, sx2, sy2;
674 int dx1, dy1, dx2, dy2;
675
676 if (DEBUG_BLIT) {
677 fprintf(stderr, "texture blit: ");
678 dump_blit_info(info);
679 }
680
681 dst = fd_resource(info->dst.resource);
682
683 uint32_t nr_samples = fd_resource_nr_samples(&dst->b.b);
684
685 sx1 = sbox->x * nr_samples;
686 sy1 = sbox->y;
687 sx2 = (sbox->x + sbox->width) * nr_samples;
688 sy2 = sbox->y + sbox->height;
689
690 dx1 = dbox->x * nr_samples;
691 dy1 = dbox->y;
692 dx2 = (dbox->x + dbox->width) * nr_samples;
693 dy2 = dbox->y + dbox->height;
694
695 static const enum a6xx_rotation rotates[2][2] = {
696 {ROTATE_0, ROTATE_HFLIP},
697 {ROTATE_VFLIP, ROTATE_180},
698 };
699 bool mirror_x = (sx2 < sx1) != (dx2 < dx1);
700 bool mirror_y = (sy2 < sy1) != (dy2 < dy1);
701
702 enum a6xx_rotation rotate = rotates[mirror_y][mirror_x];
703
704 OUT_REG(ring,
705 A6XX_GRAS_2D_SRC_TL_X(MIN2(sx1, sx2)),
706 A6XX_GRAS_2D_SRC_BR_X(MAX2(sx1, sx2) - 1),
707 A6XX_GRAS_2D_SRC_TL_Y(MIN2(sy1, sy2)),
708 A6XX_GRAS_2D_SRC_BR_Y(MAX2(sy1, sy2) - 1),
709 );
710
711 OUT_REG(ring,
712 A6XX_GRAS_2D_DST_TL(.x = MIN2(dx1, dx2),
713 .y = MIN2(dy1, dy2)),
714 A6XX_GRAS_2D_DST_BR(.x = MAX2(dx1, dx2) - 1,
715 .y = MAX2(dy1, dy2) - 1),
716 );
717
718 if (info->scissor_enable) {
719 OUT_PKT4(ring, REG_A6XX_GRAS_2D_RESOLVE_CNTL_1, 2);
720 OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.minx) |
721 A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.miny));
722 OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
723 A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
724 }
725
726 emit_blit_setup<CHIP>(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate);
727
728 for (unsigned i = 0; i < info->dst.box.depth; i++) {
729
730 emit_blit_src<CHIP>(ring, info, sbox->z + i, nr_samples);
731 emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level,
732 dbox->z + i);
733
734 emit_blit_fini<CHIP>(ctx, ring);
735 }
736 }
737
738 static void
emit_clear_color(struct fd_ringbuffer * ring,enum pipe_format pfmt,union pipe_color_union * color)739 emit_clear_color(struct fd_ringbuffer *ring, enum pipe_format pfmt,
740 union pipe_color_union *color)
741 {
742 switch (pfmt) {
743 case PIPE_FORMAT_Z24X8_UNORM:
744 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
745 case PIPE_FORMAT_X24S8_UINT: {
746 uint32_t depth_unorm24 = color->f[0] * ((1u << 24) - 1);
747 uint8_t stencil = color->ui[1];
748 color->ui[0] = depth_unorm24 & 0xff;
749 color->ui[1] = (depth_unorm24 >> 8) & 0xff;
750 color->ui[2] = (depth_unorm24 >> 16) & 0xff;
751 color->ui[3] = stencil;
752 break;
753 }
754 default:
755 break;
756 }
757
758 OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
759 switch (fd6_ifmt(fd6_color_format(pfmt, TILE6_LINEAR))) {
760 case R2D_UNORM8:
761 case R2D_UNORM8_SRGB:
762 /* The r2d ifmt is badly named, it also covers the signed case: */
763 if (util_format_is_snorm(pfmt)) {
764 OUT_RING(ring, float_to_byte_tex(color->f[0]));
765 OUT_RING(ring, float_to_byte_tex(color->f[1]));
766 OUT_RING(ring, float_to_byte_tex(color->f[2]));
767 OUT_RING(ring, float_to_byte_tex(color->f[3]));
768 } else {
769 OUT_RING(ring, float_to_ubyte(color->f[0]));
770 OUT_RING(ring, float_to_ubyte(color->f[1]));
771 OUT_RING(ring, float_to_ubyte(color->f[2]));
772 OUT_RING(ring, float_to_ubyte(color->f[3]));
773 }
774 break;
775 case R2D_FLOAT16:
776 OUT_RING(ring, _mesa_float_to_half(color->f[0]));
777 OUT_RING(ring, _mesa_float_to_half(color->f[1]));
778 OUT_RING(ring, _mesa_float_to_half(color->f[2]));
779 OUT_RING(ring, _mesa_float_to_half(color->f[3]));
780 break;
781 case R2D_FLOAT32:
782 case R2D_INT32:
783 case R2D_INT16:
784 case R2D_INT8:
785 default:
786 OUT_RING(ring, color->ui[0]);
787 OUT_RING(ring, color->ui[1]);
788 OUT_RING(ring, color->ui[2]);
789 OUT_RING(ring, color->ui[3]);
790 break;
791 }
792 }
793
794
795 template <chip CHIP>
796 void
fd6_clear_lrz(struct fd_batch * batch,struct fd_resource * zsbuf,struct fd_bo * lrz,double depth)797 fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
798 struct fd_bo *lrz, double depth)
799 {
800 struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
801
802 if (DEBUG_BLIT) {
803 fprintf(stderr, "lrz clear:\ndst resource: ");
804 util_dump_resource(stderr, &zsbuf->b.b);
805 fprintf(stderr, "\n");
806 }
807
808 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
809 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
810 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) |
811 A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1));
812
813 union pipe_color_union clear_color = { .f = {depth} };
814
815 emit_clear_color(ring, PIPE_FORMAT_Z16_UNORM, &clear_color);
816 emit_blit_setup<CHIP>(ring, PIPE_FORMAT_Z16_UNORM, false, &clear_color, 0, ROTATE_0);
817
818 OUT_REG(ring,
819 A6XX_RB_2D_DST_INFO(
820 .color_format = FMT6_16_UNORM,
821 .tile_mode = TILE6_LINEAR,
822 .color_swap = WZYX,
823 ),
824 A6XX_RB_2D_DST(
825 .bo = lrz,
826 ),
827 A6XX_RB_2D_DST_PITCH(zsbuf->lrz_pitch * 2),
828 );
829
830 /*
831 * Blit command:
832 */
833
834 OUT_PKT7(ring, CP_BLIT, 1);
835 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
836 }
837 FD_GENX(fd6_clear_lrz);
838
839 /**
840 * Handle conversion of clear color
841 */
842 static union pipe_color_union
convert_color(enum pipe_format format,union pipe_color_union * pcolor)843 convert_color(enum pipe_format format, union pipe_color_union *pcolor)
844 {
845 const struct util_format_description *desc = util_format_description(format);
846 union pipe_color_union color = *pcolor;
847
848 for (unsigned i = 0; i < 4; i++) {
849 unsigned channel = desc->swizzle[i];
850
851 if (desc->channel[channel].normalized)
852 continue;
853
854 switch (desc->channel[channel].type) {
855 case UTIL_FORMAT_TYPE_SIGNED:
856 color.i[i] = MAX2(color.i[i], -(1<<(desc->channel[channel].size - 1)));
857 color.i[i] = MIN2(color.i[i], (1 << (desc->channel[channel].size - 1)) - 1);
858 break;
859 case UTIL_FORMAT_TYPE_UNSIGNED:
860 color.ui[i] = MIN2(color.ui[i], BITFIELD_MASK(desc->channel[channel].size));
861 break;
862 }
863 }
864
865 /* For solid-fill blits, the hw isn't going to convert from
866 * linear to srgb for us:
867 */
868 if (util_format_is_srgb(format)) {
869 for (int i = 0; i < 3; i++)
870 color.f[i] = util_format_linear_to_srgb_float(color.f[i]);
871 }
872
873 if (util_format_is_snorm(format)) {
874 for (int i = 0; i < 3; i++)
875 color.f[i] = CLAMP(color.f[i], -1.0f, 1.0f);
876 }
877
878 return color;
879 }
880
881 template <chip CHIP>
882 static void
fd6_clear_buffer(struct pipe_context * pctx,struct pipe_resource * prsc,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)883 fd6_clear_buffer(struct pipe_context *pctx,
884 struct pipe_resource *prsc,
885 unsigned offset, unsigned size,
886 const void *clear_value, int clear_value_size)
887 {
888 enum pipe_format dst_fmt;
889 union pipe_color_union color;
890
891 switch (clear_value_size) {
892 case 16:
893 dst_fmt = PIPE_FORMAT_R32G32B32A32_UINT;
894 memcpy(&color.ui, clear_value, 16);
895 break;
896 case 8:
897 dst_fmt = PIPE_FORMAT_R32G32_UINT;
898 memcpy(&color.ui, clear_value, 8);
899 memset(&color.ui[2], 0, 8);
900 break;
901 case 4:
902 dst_fmt = PIPE_FORMAT_R32_UINT;
903 memcpy(&color.ui, clear_value, 4);
904 memset(&color.ui[1], 0, 12);
905 break;
906 case 2:
907 dst_fmt = PIPE_FORMAT_R16_UINT;
908 color.ui[0] = *(unsigned short *)clear_value;
909 memset(&color.ui[1], 0, 12);
910 break;
911 case 1:
912 dst_fmt = PIPE_FORMAT_R8_UINT;
913 color.ui[0] = *(unsigned char *)clear_value;
914 memset(&color.ui[1], 0, 12);
915 break;
916 default:
917 dst_fmt = PIPE_FORMAT_NONE;
918 break;
919 }
920
921 /* unsupported clear_value_size and when alignment doesn't match fallback */
922 if ((dst_fmt == PIPE_FORMAT_NONE) || (offset % clear_value_size)) {
923 u_default_clear_buffer(pctx, prsc, offset, size, clear_value, clear_value_size);
924 return;
925 }
926
927 if (DEBUG_BLIT) {
928 fprintf(stderr, "buffer clear:\ndst resource: ");
929 util_dump_resource(stderr, prsc);
930 fprintf(stderr, "\n");
931 }
932
933 struct fd_context *ctx = fd_context(pctx);
934 struct fd_resource *rsc = fd_resource(prsc);
935 struct fd_batch *batch = fd_bc_alloc_batch(ctx, true);
936 struct fd_ringbuffer *ring = batch->draw;
937
938 fd_screen_lock(ctx->screen);
939 fd_batch_resource_write(batch, rsc);
940 fd_screen_unlock(ctx->screen);
941
942 assert(!batch->flushed);
943
944 /* Marking the batch as needing flush must come after the batch
945 * dependency tracking (resource_read()/resource_write()), as that
946 * can trigger a flush
947 */
948 fd_batch_needs_flush(batch);
949
950 fd_batch_update_queries(batch);
951
952 emit_setup<CHIP>(batch);
953
954 emit_clear_color(ring, dst_fmt, &color);
955 emit_blit_setup<CHIP>(ring, dst_fmt, false, &color, 0, ROTATE_0);
956
957 unsigned dshift = (offset / clear_value_size) & 0x3f;
958 for (unsigned part_offset = 0; part_offset < size; part_offset += (0x4000 - 0x40)) {
959 unsigned doff = (offset + part_offset) & ~0x3f;
960
961 unsigned w = MIN2((size - part_offset) / clear_value_size, (0x4000 - 0x40));
962
963 emit_blit_buffer_dst(ring, rsc, doff, 0, fd6_color_format(dst_fmt, TILE6_LINEAR));
964
965 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
966 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
967 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) |
968 A6XX_GRAS_2D_DST_BR_Y(0));
969
970 emit_blit_fini<CHIP>(ctx, ring);
971 }
972
973 fd6_emit_flushes<CHIP>(batch->ctx, ring,
974 FD6_FLUSH_CCU_COLOR |
975 FD6_FLUSH_CCU_DEPTH |
976 FD6_FLUSH_CACHE |
977 FD6_WAIT_FOR_IDLE);
978
979 fd_batch_flush(batch);
980 fd_batch_reference(&batch, NULL);
981
982 /* Acc query state will have been dirtied by our fd_batch_update_queries, so
983 * the ctx->batch may need to turn its queries back on.
984 */
985 fd_context_dirty(ctx, FD_DIRTY_QUERY);
986 }
987
988 template <chip CHIP>
989 void
fd6_clear_surface(struct fd_context * ctx,struct fd_ringbuffer * ring,struct pipe_surface * psurf,const struct pipe_box * box2d,union pipe_color_union * color,uint32_t unknown_8c01)990 fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
991 struct pipe_surface *psurf, const struct pipe_box *box2d,
992 union pipe_color_union *color, uint32_t unknown_8c01)
993 {
994 if (DEBUG_BLIT) {
995 fprintf(stderr, "surface clear:\ndst resource: ");
996 util_dump_resource(stderr, psurf->texture);
997 fprintf(stderr, "\n");
998 }
999
1000 uint32_t nr_samples = fd_resource_nr_samples(psurf->texture);
1001 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
1002 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(box2d->x * nr_samples) |
1003 A6XX_GRAS_2D_DST_TL_Y(box2d->y));
1004 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X((box2d->x + box2d->width) * nr_samples - 1) |
1005 A6XX_GRAS_2D_DST_BR_Y(box2d->y + box2d->height - 1));
1006
1007 union pipe_color_union clear_color = convert_color(psurf->format, color);
1008
1009 emit_clear_color(ring, psurf->format, &clear_color);
1010 emit_blit_setup<CHIP>(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0);
1011
1012 for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer;
1013 i++) {
1014 emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level, i);
1015
1016 emit_blit_fini<CHIP>(ctx, ring);
1017 }
1018 }
1019 FD_GENX(fd6_clear_surface);
1020
1021 template <chip CHIP>
1022 static void
fd6_clear_texture(struct pipe_context * pctx,struct pipe_resource * prsc,unsigned level,const struct pipe_box * box,const void * data)1023 fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
1024 unsigned level, const struct pipe_box *box, const void *data)
1025 assert_dt
1026 {
1027 struct fd_context *ctx = fd_context(pctx);
1028 struct fd_resource *rsc = fd_resource(prsc);
1029
1030 if (DEBUG_BLIT) {
1031 fprintf(stderr, "surface texture:\ndst resource: ");
1032 util_dump_resource(stderr, prsc);
1033 fprintf(stderr, "\n");
1034 }
1035
1036 if (!can_do_clear(prsc, level, box)) {
1037 u_default_clear_texture(pctx, prsc, level, box, data);
1038 return;
1039 }
1040
1041 union pipe_color_union color;
1042
1043 if (util_format_is_depth_or_stencil(prsc->format)) {
1044 const struct util_format_description *desc =
1045 util_format_description(prsc->format);
1046 float depth = 0.0f;
1047 uint8_t stencil = 0;
1048
1049 if (util_format_has_depth(desc))
1050 util_format_unpack_z_float(prsc->format, &depth, data, 1);
1051
1052 if (util_format_has_stencil(desc))
1053 util_format_unpack_s_8uint(prsc->format, &stencil, data, 1);
1054
1055 if (rsc->stencil)
1056 fd6_clear_texture<CHIP>(pctx, &rsc->stencil->b.b, level, box, &stencil);
1057
1058 color.f[0] = depth;
1059 color.ui[1] = stencil;
1060 } else {
1061 util_format_unpack_rgba(prsc->format, color.ui, data, 1);
1062 }
1063
1064 struct fd_batch *batch = fd_bc_alloc_batch(ctx, true);
1065
1066 fd_screen_lock(ctx->screen);
1067 fd_batch_resource_write(batch, rsc);
1068 fd_screen_unlock(ctx->screen);
1069
1070 assert(!batch->flushed);
1071
1072 /* Marking the batch as needing flush must come after the batch
1073 * dependency tracking (resource_read()/resource_write()), as that
1074 * can trigger a flush
1075 */
1076 fd_batch_needs_flush(batch);
1077
1078 fd_batch_update_queries(batch);
1079
1080 emit_setup<CHIP>(batch);
1081
1082 struct pipe_surface surf = {
1083 .format = prsc->format,
1084 .texture = prsc,
1085 .u = {
1086 .tex = {
1087 .level = level,
1088 .first_layer = box->z,
1089 .last_layer = box->depth + box->z - 1,
1090 },
1091 },
1092 };
1093
1094 fd6_clear_surface<CHIP>(ctx, batch->draw, &surf, box, &color, 0);
1095
1096 fd6_emit_flushes<CHIP>(batch->ctx, batch->draw,
1097 FD6_FLUSH_CCU_COLOR |
1098 FD6_FLUSH_CCU_DEPTH |
1099 FD6_FLUSH_CACHE |
1100 FD6_WAIT_FOR_IDLE);
1101
1102 fd_batch_flush(batch);
1103 fd_batch_reference(&batch, NULL);
1104
1105 /* Acc query state will have been dirtied by our fd_batch_update_queries, so
1106 * the ctx->batch may need to turn its queries back on.
1107 */
1108 fd_context_dirty(ctx, FD_DIRTY_QUERY);
1109 }
1110
1111 template <chip CHIP>
1112 void
fd6_resolve_tile(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,uint32_t unknown_8c01)1113 fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
1114 uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01)
1115 {
1116 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1117 uint64_t gmem_base = batch->ctx->screen->gmem_base + base;
1118 uint32_t gmem_pitch = gmem->bin_w * batch->framebuffer.samples *
1119 util_format_get_blocksize(psurf->format);
1120
1121 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
1122 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
1123 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(psurf->width - 1) |
1124 A6XX_GRAS_2D_DST_BR_Y(psurf->height - 1));
1125
1126 OUT_REG(ring,
1127 A6XX_GRAS_2D_SRC_TL_X(0),
1128 A6XX_GRAS_2D_SRC_BR_X(psurf->width - 1),
1129 A6XX_GRAS_2D_SRC_TL_Y(0),
1130 A6XX_GRAS_2D_SRC_BR_Y(psurf->height - 1),
1131 );
1132
1133 /* Enable scissor bit, which will take into account the window scissor
1134 * which is set per-tile
1135 */
1136 emit_blit_setup<CHIP>(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0);
1137
1138 /* We shouldn't be using GMEM in the layered rendering case: */
1139 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
1140
1141 emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level,
1142 psurf->u.tex.first_layer);
1143
1144 enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR);
1145 enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
1146
1147 OUT_REG(ring,
1148 SP_PS_2D_SRC_INFO(
1149 CHIP,
1150 .color_format = sfmt,
1151 .tile_mode = TILE6_2,
1152 .color_swap = WZYX,
1153 .srgb = util_format_is_srgb(psurf->format),
1154 .samples = samples,
1155 .samples_average = samples > MSAA_ONE,
1156 .unk20 = true,
1157 .unk22 = true,
1158 ),
1159 SP_PS_2D_SRC_SIZE(
1160 CHIP,
1161 .width = psurf->width,
1162 .height = psurf->height,
1163 ),
1164 SP_PS_2D_SRC(
1165 CHIP,
1166 .qword = gmem_base,
1167 ),
1168 SP_PS_2D_SRC_PITCH(
1169 CHIP,
1170 .pitch = gmem_pitch,
1171 ),
1172 );
1173
1174 /* sync GMEM writes with CACHE. */
1175 fd6_cache_inv<CHIP>(batch->ctx, ring);
1176
1177 /* Wait for CACHE_INVALIDATE to land */
1178 OUT_WFI5(ring);
1179
1180 OUT_PKT7(ring, CP_BLIT, 1);
1181 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
1182
1183 OUT_WFI5(ring);
1184
1185 /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
1186 * sysmem, and we generally assume that GMEM renderpasses leave their
1187 * results in sysmem, so we need to flush manually here.
1188 */
1189 fd6_emit_flushes<CHIP>(batch->ctx, ring,
1190 FD6_FLUSH_CCU_COLOR | FD6_WAIT_FOR_IDLE);
1191 }
1192 FD_GENX(fd6_resolve_tile);
1193
1194 template <chip CHIP>
1195 static bool
handle_rgba_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1196 handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
1197 assert_dt
1198 {
1199 struct fd_batch *batch;
1200
1201 assert(!(info->mask & PIPE_MASK_ZS));
1202
1203 if (!can_do_blit(info))
1204 return false;
1205
1206 struct fd_resource *src = fd_resource(info->src.resource);
1207 struct fd_resource *dst = fd_resource(info->dst.resource);
1208
1209 fd6_validate_format(ctx, src, info->src.format);
1210 fd6_validate_format(ctx, dst, info->dst.format);
1211
1212 batch = fd_bc_alloc_batch(ctx, true);
1213
1214 fd_screen_lock(ctx->screen);
1215
1216 fd_batch_resource_read(batch, src);
1217 fd_batch_resource_write(batch, dst);
1218
1219 fd_screen_unlock(ctx->screen);
1220
1221 assert(!batch->flushed);
1222
1223 /* Marking the batch as needing flush must come after the batch
1224 * dependency tracking (resource_read()/resource_write()), as that
1225 * can trigger a flush
1226 */
1227 fd_batch_needs_flush(batch);
1228
1229 fd_batch_update_queries(batch);
1230
1231 emit_setup<CHIP>(batch);
1232
1233 DBG_BLIT(info, batch);
1234
1235 trace_start_blit(&batch->trace, batch->draw, info->src.resource->target,
1236 info->dst.resource->target);
1237
1238 if ((info->src.resource->target == PIPE_BUFFER) &&
1239 (info->dst.resource->target == PIPE_BUFFER)) {
1240 assert(src->layout.tile_mode == TILE6_LINEAR);
1241 assert(dst->layout.tile_mode == TILE6_LINEAR);
1242 emit_blit_buffer<CHIP>(ctx, batch->draw, info);
1243 } else {
1244 /* I don't *think* we need to handle blits between buffer <-> !buffer */
1245 assert(info->src.resource->target != PIPE_BUFFER);
1246 assert(info->dst.resource->target != PIPE_BUFFER);
1247 emit_blit_texture<CHIP>(ctx, batch->draw, info);
1248 }
1249
1250 trace_end_blit(&batch->trace, batch->draw);
1251
1252 fd6_emit_flushes<CHIP>(batch->ctx, batch->draw,
1253 FD6_FLUSH_CCU_COLOR |
1254 FD6_FLUSH_CCU_DEPTH |
1255 FD6_FLUSH_CACHE |
1256 FD6_WAIT_FOR_IDLE);
1257
1258 fd_batch_flush(batch);
1259 fd_batch_reference(&batch, NULL);
1260
1261 /* Acc query state will have been dirtied by our fd_batch_update_queries, so
1262 * the ctx->batch may need to turn its queries back on.
1263 */
1264 fd_context_dirty(ctx, FD_DIRTY_QUERY);
1265
1266 return true;
1267 }
1268
1269 /**
1270 * Re-written z/s blits can still fail for various reasons (for example MSAA).
1271 * But we want to do the fallback blit with the re-written pipe_blit_info,
1272 * in particular as u_blitter cannot blit stencil. So handle the fallback
1273 * ourself and never "fail".
1274 */
1275 template <chip CHIP>
1276 static bool
do_rewritten_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1277 do_rewritten_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
1278 assert_dt
1279 {
1280 bool success = handle_rgba_blit<CHIP>(ctx, info);
1281 if (!success) {
1282 success = fd_blitter_blit(ctx, info);
1283 }
1284 assert(success); /* fallback should never fail! */
1285 return success;
1286 }
1287
1288 /**
1289 * Handle depth/stencil blits either via u_blitter and/or re-writing the
1290 * blit into an equivilant format that we can handle
1291 */
1292 template <chip CHIP>
1293 static bool
handle_zs_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1294 handle_zs_blit(struct fd_context *ctx,
1295 const struct pipe_blit_info *info) assert_dt
1296 {
1297 struct pipe_blit_info blit = *info;
1298
1299 if (DEBUG_BLIT) {
1300 fprintf(stderr, "---- handle_zs_blit: ");
1301 dump_blit_info(info);
1302 }
1303
1304 fail_if(info->src.format != info->dst.format);
1305
1306 struct fd_resource *src = fd_resource(info->src.resource);
1307 struct fd_resource *dst = fd_resource(info->dst.resource);
1308
1309 switch (info->dst.format) {
1310 case PIPE_FORMAT_S8_UINT:
1311 assert(info->mask == PIPE_MASK_S);
1312 blit.mask = PIPE_MASK_R;
1313 blit.src.format = PIPE_FORMAT_R8_UINT;
1314 blit.dst.format = PIPE_FORMAT_R8_UINT;
1315 blit.sample0_only = true;
1316 return do_rewritten_blit<CHIP>(ctx, &blit);
1317
1318 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1319 if (info->mask & PIPE_MASK_Z) {
1320 blit.mask = PIPE_MASK_R;
1321 blit.src.format = PIPE_FORMAT_R32_FLOAT;
1322 blit.dst.format = PIPE_FORMAT_R32_FLOAT;
1323 blit.sample0_only = true;
1324 do_rewritten_blit<CHIP>(ctx, &blit);
1325 }
1326
1327 if (info->mask & PIPE_MASK_S) {
1328 blit.mask = PIPE_MASK_R;
1329 blit.src.format = PIPE_FORMAT_R8_UINT;
1330 blit.dst.format = PIPE_FORMAT_R8_UINT;
1331 blit.src.resource = &src->stencil->b.b;
1332 blit.dst.resource = &dst->stencil->b.b;
1333 blit.sample0_only = true;
1334 do_rewritten_blit<CHIP>(ctx, &blit);
1335 }
1336
1337 return true;
1338
1339 case PIPE_FORMAT_Z16_UNORM:
1340 blit.mask = PIPE_MASK_R;
1341 blit.src.format = PIPE_FORMAT_R16_UNORM;
1342 blit.dst.format = PIPE_FORMAT_R16_UNORM;
1343 blit.sample0_only = true;
1344 return do_rewritten_blit<CHIP>(ctx, &blit);
1345
1346 case PIPE_FORMAT_Z32_UNORM:
1347 case PIPE_FORMAT_Z32_FLOAT:
1348 assert(info->mask == PIPE_MASK_Z);
1349 blit.mask = PIPE_MASK_R;
1350 blit.src.format = PIPE_FORMAT_R32_UINT;
1351 blit.dst.format = PIPE_FORMAT_R32_UINT;
1352 blit.sample0_only = true;
1353 return do_rewritten_blit<CHIP>(ctx, &blit);
1354
1355 case PIPE_FORMAT_Z24X8_UNORM:
1356 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1357 blit.mask = 0;
1358 if (info->mask & PIPE_MASK_Z)
1359 blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
1360 if (info->mask & PIPE_MASK_S)
1361 blit.mask |= PIPE_MASK_A;
1362 blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1363 blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1364 /* non-UBWC Z24_UNORM_S8_UINT_AS_R8G8B8A8 is broken on a630, fall back to
1365 * 8888_unorm.
1366 */
1367 if (!ctx->screen->info->a6xx.has_z24uint_s8uint) {
1368 if (!src->layout.ubwc && !dst->layout.ubwc) {
1369 blit.src.format = PIPE_FORMAT_RGBA8888_UINT;
1370 blit.dst.format = PIPE_FORMAT_RGBA8888_UINT;
1371 } else {
1372 if (!src->layout.ubwc)
1373 blit.src.format = PIPE_FORMAT_RGBA8888_UNORM;
1374 if (!dst->layout.ubwc)
1375 blit.dst.format = PIPE_FORMAT_RGBA8888_UNORM;
1376 }
1377 }
1378 if (info->src.resource->nr_samples > 1 && blit.src.format != PIPE_FORMAT_RGBA8888_UINT)
1379 blit.sample0_only = true;
1380 return fd_blitter_blit(ctx, &blit);
1381
1382 default:
1383 return false;
1384 }
1385 }
1386
1387 template <chip CHIP>
1388 static bool
handle_compressed_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1389 handle_compressed_blit(struct fd_context *ctx,
1390 const struct pipe_blit_info *info) assert_dt
1391 {
1392 struct pipe_blit_info blit = *info;
1393
1394 if (DEBUG_BLIT) {
1395 fprintf(stderr, "---- handle_compressed_blit: ");
1396 dump_blit_info(info);
1397 }
1398
1399 if (info->src.format != info->dst.format)
1400 return fd_blitter_blit(ctx, info);
1401
1402 if (util_format_get_blocksize(info->src.format) == 8) {
1403 blit.src.format = blit.dst.format = PIPE_FORMAT_R16G16B16A16_UINT;
1404 } else {
1405 assert(util_format_get_blocksize(info->src.format) == 16);
1406 blit.src.format = blit.dst.format = PIPE_FORMAT_R32G32B32A32_UINT;
1407 }
1408
1409 int bw = util_format_get_blockwidth(info->src.format);
1410 int bh = util_format_get_blockheight(info->src.format);
1411
1412 /* NOTE: x/y *must* be aligned to block boundary (ie. in
1413 * glCompressedTexSubImage2D()) but width/height may not
1414 * be:
1415 */
1416
1417 assert((blit.src.box.x % bw) == 0);
1418 assert((blit.src.box.y % bh) == 0);
1419
1420 blit.src.box.x /= bw;
1421 blit.src.box.y /= bh;
1422 blit.src.box.width = DIV_ROUND_UP(blit.src.box.width, bw);
1423 blit.src.box.height = DIV_ROUND_UP(blit.src.box.height, bh);
1424
1425 assert((blit.dst.box.x % bw) == 0);
1426 assert((blit.dst.box.y % bh) == 0);
1427
1428 blit.dst.box.x /= bw;
1429 blit.dst.box.y /= bh;
1430 blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw);
1431 blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
1432
1433 return do_rewritten_blit<CHIP>(ctx, &blit);
1434 }
1435
1436 /**
1437 * For SNORM formats, copy them as the equivalent UNORM format. If we treat
1438 * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81
1439 * (also -1.0), when we're supposed to be memcpying the bits. See
1440 * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
1441 */
1442 template <chip CHIP>
1443 static bool
handle_snorm_copy_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1444 handle_snorm_copy_blit(struct fd_context *ctx,
1445 const struct pipe_blit_info *info)
1446 assert_dt
1447 {
1448 /* If we're interpolating the pixels, we can't just treat the values as unorm. */
1449 fail_if(info->filter == PIPE_TEX_FILTER_LINEAR);
1450
1451 struct pipe_blit_info blit = *info;
1452
1453 blit.src.format = blit.dst.format = util_format_snorm_to_unorm(info->src.format);
1454
1455 return do_rewritten_blit<CHIP>(ctx, &blit);
1456 }
1457
1458 template <chip CHIP>
1459 static bool
fd6_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1460 fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt
1461 {
1462 if (info->mask & PIPE_MASK_ZS)
1463 return handle_zs_blit<CHIP>(ctx, info);
1464
1465 if (util_format_is_compressed(info->src.format) ||
1466 util_format_is_compressed(info->dst.format))
1467 return handle_compressed_blit<CHIP>(ctx, info);
1468
1469 if ((info->src.format == info->dst.format) &&
1470 util_format_is_snorm(info->src.format))
1471 return handle_snorm_copy_blit<CHIP>(ctx, info);
1472
1473 return handle_rgba_blit<CHIP>(ctx, info);
1474 }
1475
1476 template <chip CHIP>
1477 void
fd6_blitter_init(struct pipe_context * pctx)1478 fd6_blitter_init(struct pipe_context *pctx)
1479 disable_thread_safety_analysis
1480 {
1481 struct fd_context *ctx = fd_context(pctx);
1482
1483 ctx->clear_ubwc = fd6_clear_ubwc<CHIP>;
1484 ctx->validate_format = fd6_validate_format;
1485
1486 if (FD_DBG(NOBLIT))
1487 return;
1488
1489 pctx->clear_buffer = fd6_clear_buffer<CHIP>;
1490 pctx->clear_texture = fd6_clear_texture<CHIP>;
1491 ctx->blit = fd6_blit<CHIP>;
1492 }
1493 FD_GENX(fd6_blitter_init);
1494
1495 unsigned
fd6_tile_mode_for_format(enum pipe_format pfmt)1496 fd6_tile_mode_for_format(enum pipe_format pfmt)
1497 {
1498 /* basically just has to be a format we can blit, so uploads/downloads
1499 * via linear staging buffer works:
1500 */
1501 if (ok_format(pfmt))
1502 return TILE6_3;
1503
1504 return TILE6_LINEAR;
1505 }
1506 unsigned
fd6_tile_mode(const struct pipe_resource * tmpl)1507 fd6_tile_mode(const struct pipe_resource *tmpl)
1508 {
1509 /* if the mipmap level 0 is still too small to be tiled, then don't
1510 * bother pretending:
1511 */
1512 if ((tmpl->width0 < FDL_MIN_UBWC_WIDTH) &&
1513 !util_format_is_depth_or_stencil(tmpl->format))
1514 return TILE6_LINEAR;
1515
1516 return fd6_tile_mode_for_format(tmpl->format);
1517 }
1518