1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 * Copyright 2015 Advanced Micro Devices, Inc.
4 *
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include "si_pipe.h"
9 #include "util/format/u_format.h"
10 #include "util/u_log.h"
11 #include "util/u_surface.h"
12 #include "util/hash_table.h"
13 #include "ac_nir_meta.h"
14
15 enum
16 {
17 SI_COPY =
18 SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND,
19
20 SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | SI_SAVE_FRAGMENT_STATE,
21
22 SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND,
23
24 SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE
25 };
26
si_blitter_begin(struct si_context * sctx,enum si_blitter_op op)27 void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op)
28 {
29 util_blitter_save_vertex_shader(sctx->blitter, sctx->shader.vs.cso);
30 util_blitter_save_tessctrl_shader(sctx->blitter, sctx->shader.tcs.cso);
31 util_blitter_save_tesseval_shader(sctx->blitter, sctx->shader.tes.cso);
32 util_blitter_save_geometry_shader(sctx->blitter, sctx->shader.gs.cso);
33 util_blitter_save_so_targets(sctx->blitter, sctx->streamout.num_targets,
34 (struct pipe_stream_output_target **)sctx->streamout.targets,
35 sctx->streamout.output_prim);
36 util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
37
38 if (op & SI_SAVE_FRAGMENT_STATE) {
39 struct pipe_constant_buffer fs_cb = {};
40 si_get_pipe_constant_buffer(sctx, PIPE_SHADER_FRAGMENT, 0, &fs_cb);
41
42 if (op & SI_SAVE_FRAGMENT_CONSTANT)
43 util_blitter_save_fragment_constant_buffer_slot(sctx->blitter, &fs_cb);
44
45 pipe_resource_reference(&fs_cb.buffer, NULL);
46 util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
47 util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
48 util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
49 util_blitter_save_fragment_shader(sctx->blitter, sctx->shader.ps.cso);
50 util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask, sctx->ps_iter_samples);
51 util_blitter_save_scissor(sctx->blitter, &sctx->scissors[0]);
52 util_blitter_save_window_rectangles(sctx->blitter, sctx->window_rectangles_include,
53 sctx->num_window_rectangles, sctx->window_rectangles);
54 }
55
56 if (op & SI_SAVE_FRAMEBUFFER)
57 util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state);
58
59 if (op & SI_SAVE_TEXTURES) {
60 util_blitter_save_fragment_sampler_states(
61 sctx->blitter, 2, (void **)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states);
62
63 util_blitter_save_fragment_sampler_views(sctx->blitter, 2,
64 sctx->samplers[PIPE_SHADER_FRAGMENT].views);
65 }
66
67 if (op & SI_DISABLE_RENDER_COND)
68 sctx->render_cond_enabled = false;
69
70 if (sctx->screen->dpbb_allowed) {
71 sctx->dpbb_force_off = true;
72 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
73 }
74
75 /* Force-disable fbfetch because there are unsolvable recursion problems with u_blitter. */
76 si_force_disable_ps_colorbuf0_slot(sctx);
77
78 sctx->blitter_running = true;
79 }
80
si_blitter_end(struct si_context * sctx)81 void si_blitter_end(struct si_context *sctx)
82 {
83 sctx->blitter_running = false;
84
85 if (sctx->screen->dpbb_allowed) {
86 sctx->dpbb_force_off = false;
87 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
88 }
89
90 sctx->render_cond_enabled = sctx->render_cond;
91
92 /* Restore shader pointers because the VS blit shader changed all
93 * non-global VS user SGPRs. */
94 sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
95
96 if (sctx->gfx_level >= GFX11)
97 sctx->gs_attribute_ring_pointer_dirty = true;
98
99 /* Reset SI_SGPR_SMALL_PRIM_CULL_INFO: */
100 if (sctx->screen->use_ngg_culling)
101 si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);
102
103 sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0;
104 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
105
106 /* We force-disabled fbfetch for u_blitter, so recompute the state. */
107 si_update_ps_colorbuf0_slot(sctx);
108 }
109
u_max_sample(struct pipe_resource * r)110 static unsigned u_max_sample(struct pipe_resource *r)
111 {
112 return r->nr_samples ? r->nr_samples - 1 : 0;
113 }
114
si_blit_dbcb_copy(struct si_context * sctx,struct si_texture * src,struct si_texture * dst,unsigned planes,unsigned level_mask,unsigned first_layer,unsigned last_layer,unsigned first_sample,unsigned last_sample)115 static unsigned si_blit_dbcb_copy(struct si_context *sctx, struct si_texture *src,
116 struct si_texture *dst, unsigned planes, unsigned level_mask,
117 unsigned first_layer, unsigned last_layer, unsigned first_sample,
118 unsigned last_sample)
119 {
120 struct pipe_surface surf_tmpl = {{0}};
121 unsigned layer, sample, checked_last_layer, max_layer;
122 unsigned fully_copied_levels = 0;
123
124 assert(sctx->gfx_level < GFX11);
125
126 if (planes & PIPE_MASK_Z)
127 sctx->dbcb_depth_copy_enabled = true;
128 if (planes & PIPE_MASK_S)
129 sctx->dbcb_stencil_copy_enabled = true;
130 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
131
132 assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
133
134 sctx->decompression_enabled = true;
135
136 while (level_mask) {
137 unsigned level = u_bit_scan(&level_mask);
138
139 /* The smaller the mipmap level, the less layers there are
140 * as far as 3D textures are concerned. */
141 max_layer = util_max_layer(&src->buffer.b.b, level);
142 checked_last_layer = MIN2(last_layer, max_layer);
143
144 surf_tmpl.u.tex.level = level;
145
146 for (layer = first_layer; layer <= checked_last_layer; layer++) {
147 struct pipe_surface *zsurf, *cbsurf;
148
149 surf_tmpl.format = src->buffer.b.b.format;
150 surf_tmpl.u.tex.first_layer = layer;
151 surf_tmpl.u.tex.last_layer = layer;
152
153 zsurf = sctx->b.create_surface(&sctx->b, &src->buffer.b.b, &surf_tmpl);
154
155 surf_tmpl.format = dst->buffer.b.b.format;
156 cbsurf = sctx->b.create_surface(&sctx->b, &dst->buffer.b.b, &surf_tmpl);
157
158 for (sample = first_sample; sample <= last_sample; sample++) {
159 if (sample != sctx->dbcb_copy_sample) {
160 sctx->dbcb_copy_sample = sample;
161 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
162 }
163
164 si_blitter_begin(sctx, SI_DECOMPRESS);
165 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
166 sctx->custom_dsa_flush, 1.0f);
167 si_blitter_end(sctx);
168 }
169
170 pipe_surface_reference(&zsurf, NULL);
171 pipe_surface_reference(&cbsurf, NULL);
172 }
173
174 if (first_layer == 0 && last_layer >= max_layer && first_sample == 0 &&
175 last_sample >= u_max_sample(&src->buffer.b.b))
176 fully_copied_levels |= 1u << level;
177 }
178
179 sctx->decompression_enabled = false;
180 sctx->dbcb_depth_copy_enabled = false;
181 sctx->dbcb_stencil_copy_enabled = false;
182 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
183
184 return fully_copied_levels;
185 }
186
187 /* Helper function for si_blit_decompress_zs_in_place.
188 */
si_blit_decompress_zs_planes_in_place(struct si_context * sctx,struct si_texture * texture,unsigned planes,unsigned level_mask,unsigned first_layer,unsigned last_layer)189 static void si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
190 struct si_texture *texture, unsigned planes,
191 unsigned level_mask, unsigned first_layer,
192 unsigned last_layer)
193 {
194 struct pipe_surface *zsurf, surf_tmpl = {{0}};
195 unsigned layer, max_layer, checked_last_layer;
196 unsigned fully_decompressed_mask = 0;
197
198 assert(sctx->gfx_level < GFX12);
199
200 if (!level_mask)
201 return;
202
203 if (planes & PIPE_MASK_S)
204 sctx->db_flush_stencil_inplace = true;
205 if (planes & PIPE_MASK_Z)
206 sctx->db_flush_depth_inplace = true;
207 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
208
209 surf_tmpl.format = texture->buffer.b.b.format;
210
211 sctx->decompression_enabled = true;
212
213 while (level_mask) {
214 unsigned level = u_bit_scan(&level_mask);
215
216 surf_tmpl.u.tex.level = level;
217
218 /* The smaller the mipmap level, the less layers there are
219 * as far as 3D textures are concerned. */
220 max_layer = util_max_layer(&texture->buffer.b.b, level);
221 checked_last_layer = MIN2(last_layer, max_layer);
222
223 for (layer = first_layer; layer <= checked_last_layer; layer++) {
224 surf_tmpl.u.tex.first_layer = layer;
225 surf_tmpl.u.tex.last_layer = layer;
226
227 zsurf = sctx->b.create_surface(&sctx->b, &texture->buffer.b.b, &surf_tmpl);
228
229 si_blitter_begin(sctx, SI_DECOMPRESS);
230 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, sctx->custom_dsa_flush,
231 1.0f);
232 si_blitter_end(sctx);
233
234 pipe_surface_reference(&zsurf, NULL);
235 }
236
237 /* The texture will always be dirty if some layers aren't flushed.
238 * I don't think this case occurs often though. */
239 if (first_layer == 0 && last_layer >= max_layer) {
240 fully_decompressed_mask |= 1u << level;
241 }
242 }
243
244 if (planes & PIPE_MASK_Z)
245 texture->dirty_level_mask &= ~fully_decompressed_mask;
246 if (planes & PIPE_MASK_S)
247 texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
248
249 sctx->decompression_enabled = false;
250 sctx->db_flush_depth_inplace = false;
251 sctx->db_flush_stencil_inplace = false;
252 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
253 }
254
255 /* Helper function of si_flush_depth_texture: decompress the given levels
256 * of Z and/or S planes in place.
257 */
si_blit_decompress_zs_in_place(struct si_context * sctx,struct si_texture * texture,unsigned levels_z,unsigned levels_s,unsigned first_layer,unsigned last_layer)258 static void si_blit_decompress_zs_in_place(struct si_context *sctx, struct si_texture *texture,
259 unsigned levels_z, unsigned levels_s,
260 unsigned first_layer, unsigned last_layer)
261 {
262 unsigned both = levels_z & levels_s;
263
264 /* First, do combined Z & S decompresses for levels that need it. */
265 if (both) {
266 si_blit_decompress_zs_planes_in_place(sctx, texture, PIPE_MASK_Z | PIPE_MASK_S, both,
267 first_layer, last_layer);
268 levels_z &= ~both;
269 levels_s &= ~both;
270 }
271
272 /* Now do separate Z and S decompresses. */
273 if (levels_z) {
274 si_blit_decompress_zs_planes_in_place(sctx, texture, PIPE_MASK_Z, levels_z, first_layer,
275 last_layer);
276 }
277
278 if (levels_s) {
279 si_blit_decompress_zs_planes_in_place(sctx, texture, PIPE_MASK_S, levels_s, first_layer,
280 last_layer);
281 }
282 }
283
si_decompress_depth(struct si_context * sctx,struct si_texture * tex,unsigned required_planes,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer)284 static void si_decompress_depth(struct si_context *sctx, struct si_texture *tex,
285 unsigned required_planes, unsigned first_level, unsigned last_level,
286 unsigned first_layer, unsigned last_layer)
287 {
288 unsigned inplace_planes = 0;
289 unsigned copy_planes = 0;
290 unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1);
291 unsigned levels_z = 0;
292 unsigned levels_s = 0;
293
294 assert(sctx->gfx_level < GFX12);
295
296 if (required_planes & PIPE_MASK_Z) {
297 levels_z = level_mask & tex->dirty_level_mask;
298
299 if (levels_z) {
300 if (si_can_sample_zs(tex, false))
301 inplace_planes |= PIPE_MASK_Z;
302 else
303 copy_planes |= PIPE_MASK_Z;
304 }
305 }
306 if (required_planes & PIPE_MASK_S) {
307 levels_s = level_mask & tex->stencil_dirty_level_mask;
308
309 if (levels_s) {
310 if (si_can_sample_zs(tex, true))
311 inplace_planes |= PIPE_MASK_S;
312 else
313 copy_planes |= PIPE_MASK_S;
314 }
315 }
316
317 if (unlikely(sctx->log))
318 u_log_printf(sctx->log,
319 "\n------------------------------------------------\n"
320 "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n",
321 first_level, last_level, levels_z, levels_s);
322
323 /* We may have to allocate the flushed texture here when called from
324 * si_decompress_subresource.
325 */
326 if (copy_planes &&
327 (tex->flushed_depth_texture || si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b))) {
328 struct si_texture *dst = tex->flushed_depth_texture;
329 unsigned fully_copied_levels;
330 unsigned levels = 0;
331
332 assert(tex->flushed_depth_texture);
333
334 if (util_format_is_depth_and_stencil(dst->buffer.b.b.format))
335 copy_planes = PIPE_MASK_Z | PIPE_MASK_S;
336
337 if (copy_planes & PIPE_MASK_Z) {
338 levels |= levels_z;
339 levels_z = 0;
340 }
341 if (copy_planes & PIPE_MASK_S) {
342 levels |= levels_s;
343 levels_s = 0;
344 }
345
346 fully_copied_levels = si_blit_dbcb_copy(sctx, tex, dst, copy_planes, levels, first_layer,
347 last_layer, 0, u_max_sample(&tex->buffer.b.b));
348
349 if (copy_planes & PIPE_MASK_Z)
350 tex->dirty_level_mask &= ~fully_copied_levels;
351 if (copy_planes & PIPE_MASK_S)
352 tex->stencil_dirty_level_mask &= ~fully_copied_levels;
353 }
354
355 if (inplace_planes) {
356 bool has_htile = si_htile_enabled(tex, first_level, inplace_planes);
357 bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level, inplace_planes);
358
359 /* Don't decompress if there is no HTILE or when HTILE is
360 * TC-compatible. */
361 if (has_htile && !tc_compat_htile) {
362 si_blit_decompress_zs_in_place(sctx, tex, levels_z, levels_s, first_layer, last_layer);
363 } else {
364 /* This is only a cache flush.
365 *
366 * Only clear the mask that we are flushing, because
367 * si_make_DB_shader_coherent() treats different levels
368 * and depth and stencil differently.
369 */
370 if (inplace_planes & PIPE_MASK_Z)
371 tex->dirty_level_mask &= ~levels_z;
372 if (inplace_planes & PIPE_MASK_S)
373 tex->stencil_dirty_level_mask &= ~levels_s;
374 }
375
376 /* We just had to completely decompress Z/S for texturing. Enable
377 * TC-compatible HTILE on the next clear, so that the decompression
378 * doesn't have to be done for this texture ever again.
379 *
380 * TC-compatible HTILE might slightly reduce Z/S performance, but
381 * the decompression is much worse.
382 */
383 if (has_htile && !tc_compat_htile &&
384 /* We can only transition the whole buffer in one clear, so no mipmapping: */
385 tex->buffer.b.b.last_level == 0 &&
386 tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE &&
387 (inplace_planes & PIPE_MASK_Z || !tex->htile_stencil_disabled))
388 tex->enable_tc_compatible_htile_next_clear = true;
389
390 /* Only in-place decompression needs to flush DB caches, or
391 * when we don't decompress but TC-compatible planes are dirty.
392 */
393 si_make_DB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, inplace_planes & PIPE_MASK_S,
394 tc_compat_htile);
395 }
396 /* set_framebuffer_state takes care of coherency for single-sample.
397 * The DB->CB copy uses CB for the final writes.
398 */
399 if (copy_planes && tex->buffer.b.b.nr_samples > 1)
400 si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, false, true /* no DCC */);
401 }
402
si_decompress_sampler_depth_textures(struct si_context * sctx,struct si_samplers * textures)403 static bool si_decompress_sampler_depth_textures(struct si_context *sctx,
404 struct si_samplers *textures)
405 {
406 unsigned i;
407 unsigned mask = textures->needs_depth_decompress_mask;
408 bool need_flush = false;
409
410 assert(sctx->gfx_level < GFX12);
411
412 while (mask) {
413 struct pipe_sampler_view *view;
414 struct si_sampler_view *sview;
415 struct si_texture *tex;
416
417 i = u_bit_scan(&mask);
418
419 view = textures->views[i];
420 assert(view);
421 sview = (struct si_sampler_view *)view;
422
423 tex = (struct si_texture *)view->texture;
424 assert(tex->db_compatible);
425
426 si_decompress_depth(sctx, tex, sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
427 view->u.tex.first_level, view->u.tex.last_level, 0,
428 util_max_layer(&tex->buffer.b.b, view->u.tex.first_level));
429
430 if (tex->need_flush_after_depth_decompression) {
431 need_flush = true;
432 tex->need_flush_after_depth_decompression = false;
433 }
434 }
435
436 return need_flush;
437 }
438
si_blit_decompress_color(struct si_context * sctx,struct si_texture * tex,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,bool need_dcc_decompress,bool need_fmask_expand)439 static void si_blit_decompress_color(struct si_context *sctx, struct si_texture *tex,
440 unsigned first_level, unsigned last_level,
441 unsigned first_layer, unsigned last_layer,
442 bool need_dcc_decompress, bool need_fmask_expand)
443 {
444 void *custom_blend;
445 unsigned layer, checked_last_layer, max_layer;
446 unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1);
447
448 /* No decompression is ever needed on Gfx12. */
449 assert(sctx->gfx_level < GFX12);
450
451 if (!need_dcc_decompress)
452 level_mask &= tex->dirty_level_mask;
453 if (!level_mask)
454 goto expand_fmask;
455
456 /* No color decompression is needed on GFX11. */
457 assert(sctx->gfx_level < GFX11 || need_dcc_decompress);
458
459 if (unlikely(sctx->log))
460 u_log_printf(sctx->log,
461 "\n------------------------------------------------\n"
462 "Decompress Color (levels %u - %u, mask 0x%x)\n\n",
463 first_level, last_level, level_mask);
464
465 if (need_dcc_decompress) {
466 custom_blend = sctx->custom_blend_dcc_decompress;
467
468 /* DCC_DECOMPRESS and ELIMINATE_FAST_CLEAR require MSAA_NUM_SAMPLES=0. */
469 if (sctx->gfx_level >= GFX11) {
470 sctx->gfx11_force_msaa_num_samples_zero = true;
471 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
472 }
473
474 assert(vi_dcc_enabled(tex, first_level));
475
476 /* disable levels without DCC */
477 for (int i = first_level; i <= last_level; i++) {
478 if (!vi_dcc_enabled(tex, i))
479 level_mask &= ~(1 << i);
480 }
481 } else if (tex->surface.fmask_size) {
482 assert(sctx->gfx_level < GFX11);
483 custom_blend = sctx->custom_blend_fmask_decompress;
484 } else {
485 assert(sctx->gfx_level < GFX11);
486 custom_blend = sctx->custom_blend_eliminate_fastclear;
487 }
488
489 sctx->decompression_enabled = true;
490
491 while (level_mask) {
492 unsigned level = u_bit_scan(&level_mask);
493
494 /* The smaller the mipmap level, the less layers there are
495 * as far as 3D textures are concerned. */
496 max_layer = util_max_layer(&tex->buffer.b.b, level);
497 checked_last_layer = MIN2(last_layer, max_layer);
498
499 for (layer = first_layer; layer <= checked_last_layer; layer++) {
500 struct pipe_surface *cbsurf, surf_tmpl;
501
502 surf_tmpl.format = tex->buffer.b.b.format;
503 surf_tmpl.u.tex.level = level;
504 surf_tmpl.u.tex.first_layer = layer;
505 surf_tmpl.u.tex.last_layer = layer;
506 cbsurf = sctx->b.create_surface(&sctx->b, &tex->buffer.b.b, &surf_tmpl);
507
508 /* Required before and after FMASK and DCC_DECOMPRESS. */
509 if (custom_blend == sctx->custom_blend_fmask_decompress ||
510 custom_blend == sctx->custom_blend_dcc_decompress) {
511 sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
512 si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
513 }
514
515 si_blitter_begin(sctx, SI_DECOMPRESS);
516 util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
517 si_blitter_end(sctx);
518
519 if (custom_blend == sctx->custom_blend_fmask_decompress ||
520 custom_blend == sctx->custom_blend_dcc_decompress) {
521 sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
522 si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
523 }
524
525 /* When running FMASK decompression with DCC, we need to run the "eliminate fast clear" pass
526 * separately because FMASK decompression doesn't eliminate DCC fast clear. This makes
527 * render->texture transitions more expensive. It can be disabled by
528 * allow_dcc_msaa_clear_to_reg_for_bpp.
529 *
530 * TODO: When we get here, change the compression to TC-compatible on the next clear
531 * to disable both the FMASK decompression and fast clear elimination passes.
532 */
533 if (sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)] &&
534 custom_blend == sctx->custom_blend_fmask_decompress &&
535 vi_dcc_enabled(tex, level)) {
536 si_blitter_begin(sctx, SI_DECOMPRESS);
537 util_blitter_custom_color(sctx->blitter, cbsurf, sctx->custom_blend_eliminate_fastclear);
538 si_blitter_end(sctx);
539 }
540
541 pipe_surface_reference(&cbsurf, NULL);
542 }
543
544 /* The texture will always be dirty if some layers aren't flushed.
545 * I don't think this case occurs often though. */
546 if (first_layer == 0 && last_layer >= max_layer) {
547 tex->dirty_level_mask &= ~(1 << level);
548 }
549 }
550
551 sctx->decompression_enabled = false;
552 si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, vi_dcc_enabled(tex, first_level),
553 tex->surface.u.gfx9.color.dcc.pipe_aligned);
554
555 /* Restore gfx11_force_msaa_num_samples_zero. */
556 if (sctx->gfx11_force_msaa_num_samples_zero) {
557 sctx->gfx11_force_msaa_num_samples_zero = false;
558 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
559 }
560
561 expand_fmask:
562 if (need_fmask_expand && tex->surface.fmask_offset && !tex->fmask_is_identity) {
563 assert(sctx->gfx_level < GFX11); /* no FMASK on gfx11 */
564 si_compute_expand_fmask(&sctx->b, &tex->buffer.b.b);
565 tex->fmask_is_identity = true;
566 }
567 }
568
si_decompress_color_texture(struct si_context * sctx,struct si_texture * tex,unsigned first_level,unsigned last_level,bool need_fmask_expand)569 static void si_decompress_color_texture(struct si_context *sctx, struct si_texture *tex,
570 unsigned first_level, unsigned last_level,
571 bool need_fmask_expand)
572 {
573 assert(sctx->gfx_level < GFX11);
574
575 /* CMASK or DCC can be discarded and we can still end up here. */
576 if (!tex->cmask_buffer && !tex->surface.fmask_size &&
577 !vi_dcc_enabled(tex, first_level))
578 return;
579
580 si_blit_decompress_color(sctx, tex, first_level, last_level, 0,
581 util_max_layer(&tex->buffer.b.b, first_level), false,
582 need_fmask_expand);
583 }
584
si_decompress_sampler_color_textures(struct si_context * sctx,struct si_samplers * textures)585 static void si_decompress_sampler_color_textures(struct si_context *sctx,
586 struct si_samplers *textures)
587 {
588 unsigned i;
589 unsigned mask = textures->needs_color_decompress_mask;
590
591 assert(sctx->gfx_level < GFX11);
592
593 while (mask) {
594 struct pipe_sampler_view *view;
595 struct si_texture *tex;
596
597 i = u_bit_scan(&mask);
598
599 view = textures->views[i];
600 assert(view);
601
602 tex = (struct si_texture *)view->texture;
603
604 si_decompress_color_texture(sctx, tex, view->u.tex.first_level, view->u.tex.last_level,
605 false);
606 }
607 }
608
si_decompress_image_color_textures(struct si_context * sctx,struct si_images * images)609 static void si_decompress_image_color_textures(struct si_context *sctx, struct si_images *images)
610 {
611 unsigned i;
612 unsigned mask = images->needs_color_decompress_mask;
613
614 assert(sctx->gfx_level < GFX11);
615
616 while (mask) {
617 const struct pipe_image_view *view;
618 struct si_texture *tex;
619
620 i = u_bit_scan(&mask);
621
622 view = &images->views[i];
623 assert(view->resource->target != PIPE_BUFFER);
624
625 tex = (struct si_texture *)view->resource;
626
627 si_decompress_color_texture(sctx, tex, view->u.tex.level, view->u.tex.level,
628 view->access & PIPE_IMAGE_ACCESS_WRITE);
629 }
630 }
631
si_check_render_feedback_texture(struct si_context * sctx,struct si_texture * tex,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer)632 static void si_check_render_feedback_texture(struct si_context *sctx, struct si_texture *tex,
633 unsigned first_level, unsigned last_level,
634 unsigned first_layer, unsigned last_layer)
635 {
636 bool render_feedback = false;
637
638 assert(sctx->gfx_level < GFX12);
639
640 if (!vi_dcc_enabled(tex, first_level))
641 return;
642
643 for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) {
644 struct si_surface *surf;
645
646 if (!sctx->framebuffer.state.cbufs[j])
647 continue;
648
649 surf = (struct si_surface *)sctx->framebuffer.state.cbufs[j];
650
651 if (tex == (struct si_texture *)surf->base.texture && surf->base.u.tex.level >= first_level &&
652 surf->base.u.tex.level <= last_level && surf->base.u.tex.first_layer <= last_layer &&
653 surf->base.u.tex.last_layer >= first_layer) {
654 render_feedback = true;
655 break;
656 }
657 }
658
659 if (render_feedback)
660 si_texture_disable_dcc(sctx, tex);
661 }
662
si_check_render_feedback_textures(struct si_context * sctx,struct si_samplers * textures,uint32_t in_use_mask)663 static void si_check_render_feedback_textures(struct si_context *sctx, struct si_samplers *textures,
664 uint32_t in_use_mask)
665 {
666 uint32_t mask = textures->enabled_mask & in_use_mask;
667
668 assert(sctx->gfx_level < GFX12);
669
670 while (mask) {
671 const struct pipe_sampler_view *view;
672 struct si_texture *tex;
673
674 unsigned i = u_bit_scan(&mask);
675
676 view = textures->views[i];
677 if (view->texture->target == PIPE_BUFFER)
678 continue;
679
680 tex = (struct si_texture *)view->texture;
681
682 si_check_render_feedback_texture(sctx, tex, view->u.tex.first_level, view->u.tex.last_level,
683 view->u.tex.first_layer, view->u.tex.last_layer);
684 }
685 }
686
si_check_render_feedback_images(struct si_context * sctx,struct si_images * images,uint32_t in_use_mask)687 static void si_check_render_feedback_images(struct si_context *sctx, struct si_images *images,
688 uint32_t in_use_mask)
689 {
690 uint32_t mask = images->enabled_mask & in_use_mask;
691
692 assert(sctx->gfx_level < GFX12);
693
694 while (mask) {
695 const struct pipe_image_view *view;
696 struct si_texture *tex;
697
698 unsigned i = u_bit_scan(&mask);
699
700 view = &images->views[i];
701 if (view->resource->target == PIPE_BUFFER)
702 continue;
703
704 tex = (struct si_texture *)view->resource;
705
706 si_check_render_feedback_texture(sctx, tex, view->u.tex.level, view->u.tex.level,
707 view->u.tex.first_layer, view->u.tex.last_layer);
708 }
709 }
710
si_check_render_feedback_resident_textures(struct si_context * sctx)711 static void si_check_render_feedback_resident_textures(struct si_context *sctx)
712 {
713 assert(sctx->gfx_level < GFX12);
714
715 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
716 struct pipe_sampler_view *view;
717 struct si_texture *tex;
718
719 view = (*tex_handle)->view;
720 if (view->texture->target == PIPE_BUFFER)
721 continue;
722
723 tex = (struct si_texture *)view->texture;
724
725 si_check_render_feedback_texture(sctx, tex, view->u.tex.first_level, view->u.tex.last_level,
726 view->u.tex.first_layer, view->u.tex.last_layer);
727 }
728 }
729
si_check_render_feedback_resident_images(struct si_context * sctx)730 static void si_check_render_feedback_resident_images(struct si_context *sctx)
731 {
732 assert(sctx->gfx_level < GFX12);
733
734 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
735 struct pipe_image_view *view;
736 struct si_texture *tex;
737
738 view = &(*img_handle)->view;
739 if (view->resource->target == PIPE_BUFFER)
740 continue;
741
742 tex = (struct si_texture *)view->resource;
743
744 si_check_render_feedback_texture(sctx, tex, view->u.tex.level, view->u.tex.level,
745 view->u.tex.first_layer, view->u.tex.last_layer);
746 }
747 }
748
si_check_render_feedback(struct si_context * sctx)749 static void si_check_render_feedback(struct si_context *sctx)
750 {
751 assert(sctx->gfx_level < GFX12);
752
753 if (!sctx->need_check_render_feedback)
754 return;
755
756 /* There is no render feedback if color writes are disabled.
757 * (e.g. a pixel shader with image stores)
758 */
759 if (!si_any_colorbuffer_written(sctx))
760 return;
761
762 for (int i = 0; i < SI_NUM_GRAPHICS_SHADERS; ++i) {
763 if (!sctx->shaders[i].cso)
764 continue;
765
766 struct si_shader_info *info = &sctx->shaders[i].cso->info;
767 si_check_render_feedback_images(sctx, &sctx->images[i],
768 u_bit_consecutive(0, info->base.num_images));
769 si_check_render_feedback_textures(sctx, &sctx->samplers[i],
770 info->base.textures_used[0]);
771 }
772
773 si_check_render_feedback_resident_images(sctx);
774 si_check_render_feedback_resident_textures(sctx);
775
776 sctx->need_check_render_feedback = false;
777 }
778
si_decompress_resident_color_textures(struct si_context * sctx)779 static void si_decompress_resident_color_textures(struct si_context *sctx)
780 {
781 assert(sctx->gfx_level < GFX11);
782
783 util_dynarray_foreach (&sctx->resident_tex_needs_color_decompress, struct si_texture_handle *,
784 tex_handle) {
785 struct pipe_sampler_view *view = (*tex_handle)->view;
786 struct si_texture *tex = (struct si_texture *)view->texture;
787
788 si_decompress_color_texture(sctx, tex, view->u.tex.first_level, view->u.tex.last_level,
789 false);
790 }
791 }
792
si_decompress_resident_depth_textures(struct si_context * sctx)793 static void si_decompress_resident_depth_textures(struct si_context *sctx)
794 {
795 util_dynarray_foreach (&sctx->resident_tex_needs_depth_decompress, struct si_texture_handle *,
796 tex_handle) {
797 struct pipe_sampler_view *view = (*tex_handle)->view;
798 struct si_sampler_view *sview = (struct si_sampler_view *)view;
799 struct si_texture *tex = (struct si_texture *)view->texture;
800
801 si_decompress_depth(sctx, tex, sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
802 view->u.tex.first_level, view->u.tex.last_level, 0,
803 util_max_layer(&tex->buffer.b.b, view->u.tex.first_level));
804 }
805 }
806
si_decompress_resident_images(struct si_context * sctx)807 static void si_decompress_resident_images(struct si_context *sctx)
808 {
809 assert(sctx->gfx_level < GFX11);
810
811 util_dynarray_foreach (&sctx->resident_img_needs_color_decompress, struct si_image_handle *,
812 img_handle) {
813 struct pipe_image_view *view = &(*img_handle)->view;
814 struct si_texture *tex = (struct si_texture *)view->resource;
815
816 si_decompress_color_texture(sctx, tex, view->u.tex.level, view->u.tex.level,
817 view->access & PIPE_IMAGE_ACCESS_WRITE);
818 }
819 }
820
gfx6_decompress_textures(struct si_context * sctx,unsigned shader_mask)821 void gfx6_decompress_textures(struct si_context *sctx, unsigned shader_mask)
822 {
823 unsigned compressed_colortex_counter, mask;
824 bool need_flush = false;
825
826 if (sctx->blitter_running)
827 return;
828
829 /* Update the compressed_colortex_mask if necessary. */
830 compressed_colortex_counter = p_atomic_read(&sctx->screen->compressed_colortex_counter);
831 if (compressed_colortex_counter != sctx->last_compressed_colortex_counter) {
832 sctx->last_compressed_colortex_counter = compressed_colortex_counter;
833 si_update_needs_color_decompress_masks(sctx);
834 }
835
836 /* Decompress color & depth textures if needed. */
837 mask = sctx->shader_needs_decompress_mask & shader_mask;
838 while (mask) {
839 unsigned i = u_bit_scan(&mask);
840
841 if (sctx->samplers[i].needs_depth_decompress_mask) {
842 need_flush |= si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]);
843 }
844 if (sctx->samplers[i].needs_color_decompress_mask) {
845 si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]);
846 }
847 if (sctx->images[i].needs_color_decompress_mask) {
848 si_decompress_image_color_textures(sctx, &sctx->images[i]);
849 }
850 }
851
852 if (sctx->gfx_level == GFX10_3 && need_flush) {
853 /* This fixes a corruption with the following sequence:
854 * - fast clear depth
855 * - decompress depth
856 * - draw
857 * (see https://gitlab.freedesktop.org/drm/amd/-/issues/1810#note_1170171)
858 */
859 sctx->b.flush(&sctx->b, NULL, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW);
860 }
861
862 if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) {
863 if (sctx->uses_bindless_samplers) {
864 si_decompress_resident_color_textures(sctx);
865 si_decompress_resident_depth_textures(sctx);
866 }
867 if (sctx->uses_bindless_images)
868 si_decompress_resident_images(sctx);
869
870 if (sctx->ps_uses_fbfetch) {
871 struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
872 si_decompress_color_texture(sctx, (struct si_texture *)cb0->texture,
873 cb0->u.tex.first_layer, cb0->u.tex.last_layer, false);
874 }
875
876 si_check_render_feedback(sctx);
877 } else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) {
878 if (sctx->cs_shader_state.program->sel.info.uses_bindless_samplers) {
879 si_decompress_resident_color_textures(sctx);
880 si_decompress_resident_depth_textures(sctx);
881 }
882 if (sctx->cs_shader_state.program->sel.info.uses_bindless_images)
883 si_decompress_resident_images(sctx);
884 }
885 }
886
gfx11_decompress_textures(struct si_context * sctx,unsigned shader_mask)887 void gfx11_decompress_textures(struct si_context *sctx, unsigned shader_mask)
888 {
889 if (sctx->blitter_running)
890 return;
891
892 /* Decompress depth textures if needed. */
893 unsigned mask = sctx->shader_needs_decompress_mask & shader_mask;
894 u_foreach_bit(i, mask) {
895 assert(sctx->samplers[i].needs_depth_decompress_mask);
896 si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]);
897 }
898
899 /* Decompress bindless depth textures and disable DCC for render feedback. */
900 if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) {
901 if (sctx->uses_bindless_samplers)
902 si_decompress_resident_depth_textures(sctx);
903
904 si_check_render_feedback(sctx);
905 } else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) {
906 if (sctx->cs_shader_state.program->sel.info.uses_bindless_samplers)
907 si_decompress_resident_depth_textures(sctx);
908 }
909 }
910
911 /* Helper for decompressing a portion of a color or depth resource before
912 * blitting if any decompression is needed.
913 * The driver doesn't decompress resources automatically while u_blitter is
914 * rendering. */
si_decompress_subresource(struct pipe_context * ctx,struct pipe_resource * tex,unsigned planes,unsigned level,unsigned first_layer,unsigned last_layer,bool need_fmask_expand)915 void si_decompress_subresource(struct pipe_context *ctx, struct pipe_resource *tex, unsigned planes,
916 unsigned level, unsigned first_layer, unsigned last_layer,
917 bool need_fmask_expand)
918 {
919 struct si_context *sctx = (struct si_context *)ctx;
920 struct si_texture *stex = (struct si_texture *)tex;
921
922 if (sctx->gfx_level >= GFX12)
923 return;
924
925 if (stex->db_compatible) {
926 planes &= PIPE_MASK_Z | PIPE_MASK_S;
927
928 if (!stex->surface.has_stencil)
929 planes &= ~PIPE_MASK_S;
930
931 /* If we've rendered into the framebuffer and it's a blitting
932 * source, make sure the decompression pass is invoked
933 * by dirtying the framebuffer.
934 */
935 if (sctx->framebuffer.state.zsbuf && sctx->framebuffer.state.zsbuf->u.tex.level == level &&
936 sctx->framebuffer.state.zsbuf->texture == tex)
937 si_fb_barrier_after_rendering(sctx, SI_FB_BARRIER_SYNC_DB);
938
939 si_decompress_depth(sctx, stex, planes, level, level, first_layer, last_layer);
940 } else if (stex->surface.fmask_size || stex->cmask_buffer ||
941 vi_dcc_enabled(stex, level)) {
942 /* If we've rendered into the framebuffer and it's a blitting
943 * source, make sure the decompression pass is invoked
944 * by dirtying the framebuffer.
945 */
946 for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
947 if (sctx->framebuffer.state.cbufs[i] &&
948 sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
949 sctx->framebuffer.state.cbufs[i]->texture == tex) {
950 si_fb_barrier_after_rendering(sctx, SI_FB_BARRIER_SYNC_CB);
951 break;
952 }
953 }
954
955 si_blit_decompress_color(sctx, stex, level, level, first_layer, last_layer, false,
956 need_fmask_expand);
957 }
958 }
959
si_resource_copy_region(struct pipe_context * ctx,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)960 void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst,
961 unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,
962 struct pipe_resource *src, unsigned src_level,
963 const struct pipe_box *src_box)
964 {
965 struct si_context *sctx = (struct si_context *)ctx;
966
967 /* Handle buffers first. */
968 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
969 si_barrier_before_simple_buffer_op(sctx, 0, dst, src);
970 si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
971 si_barrier_after_simple_buffer_op(sctx, 0, dst, src);
972 return;
973 }
974
975 if (si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz, src_box, true))
976 return;
977
978 si_gfx_copy_image(sctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box);
979 }
980
si_gfx_copy_image(struct si_context * sctx,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)981 void si_gfx_copy_image(struct si_context *sctx, struct pipe_resource *dst,
982 unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,
983 struct pipe_resource *src, unsigned src_level,
984 const struct pipe_box *src_box)
985 {
986 struct si_texture *ssrc = (struct si_texture *)src;
987 struct pipe_surface *dst_view, dst_templ;
988 struct pipe_sampler_view src_templ, *src_view;
989 struct pipe_box dstbox;
990
991 /* If the blitter isn't available fail here instead of crashing. */
992 if (!sctx->blitter) {
993 fprintf(stderr, "si_resource_copy_region failed src_format: %s dst_format: %s\n",
994 util_format_name(src->format), util_format_name(dst->format));
995 return;
996 }
997
998 assert(u_max_sample(dst) == u_max_sample(src));
999
1000 /* The driver doesn't decompress resources automatically while
1001 * u_blitter is rendering. */
1002 si_decompress_subresource(&sctx->b, src, PIPE_MASK_RGBAZS, src_level, src_box->z,
1003 src_box->z + src_box->depth - 1, false);
1004
1005 util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
1006 util_blitter_default_src_texture(sctx->blitter, &src_templ, src, src_level);
1007
1008 assert(!util_format_is_compressed(src->format) && !util_format_is_compressed(dst->format));
1009 assert(!util_format_is_subsampled_422(src->format));
1010
1011 /* We can't blit as floats because it wouldn't preserve NaNs.
1012 * Z32_FLOAT needs to keep using floats.
1013 */
1014 if ((util_format_is_float(dst_templ.format) &&
1015 !util_format_is_depth_or_stencil(dst_templ.format)) ||
1016 !util_blitter_is_copy_supported(sctx->blitter, dst, src)) {
1017 switch (ssrc->surface.bpe) {
1018 case 1:
1019 dst_templ.format = src_templ.format = PIPE_FORMAT_R8_UINT;
1020 break;
1021 case 2:
1022 dst_templ.format = src_templ.format = PIPE_FORMAT_R16_UINT;
1023 break;
1024 case 4:
1025 dst_templ.format = src_templ.format = PIPE_FORMAT_R32_UINT;
1026 break;
1027 case 8:
1028 dst_templ.format = src_templ.format = PIPE_FORMAT_R32G32_UINT;
1029 break;
1030 case 16:
1031 dst_templ.format = src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
1032 break;
1033 default:
1034 fprintf(stderr, "Unhandled format %s with blocksize %u\n",
1035 util_format_short_name(src->format), ssrc->surface.bpe);
1036 assert(0);
1037 }
1038 }
1039
1040 /* SNORM blitting has precision issues on some chips. Use the SINT
1041 * equivalent instead, which doesn't force DCC decompression.
1042 */
1043 if (util_format_is_snorm(dst_templ.format))
1044 dst_templ.format = src_templ.format = util_format_snorm_to_sint(dst_templ.format);
1045
1046 vi_disable_dcc_if_incompatible_format(sctx, dst, dst_level, dst_templ.format);
1047 vi_disable_dcc_if_incompatible_format(sctx, src, src_level, src_templ.format);
1048
1049 /* Initialize the surface. */
1050 dst_view = sctx->b.create_surface(&sctx->b, dst, &dst_templ);
1051
1052 /* Initialize the sampler view. */
1053 src_view = sctx->b.create_sampler_view(&sctx->b, src, &src_templ);
1054
1055 u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height), abs(src_box->depth),
1056 &dstbox);
1057
1058 /* Copy. */
1059 si_blitter_begin(sctx, SI_COPY);
1060 util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox, src_view, src_box, src->width0,
1061 src->height0, PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
1062 false, false, 0, NULL);
1063 si_blitter_end(sctx);
1064
1065 pipe_surface_reference(&dst_view, NULL);
1066 pipe_sampler_view_reference(&src_view, NULL);
1067 }
1068
si_do_CB_resolve(struct si_context * sctx,const struct pipe_blit_info * info,struct pipe_resource * dst,unsigned dst_level,unsigned dst_z,enum pipe_format format)1069 static void si_do_CB_resolve(struct si_context *sctx, const struct pipe_blit_info *info,
1070 struct pipe_resource *dst, unsigned dst_level, unsigned dst_z,
1071 enum pipe_format format)
1072 {
1073 /* Required before and after CB_RESOLVE. */
1074 sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
1075 si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
1076
1077 si_blitter_begin(
1078 sctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
1079 util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, info->src.resource,
1080 info->src.box.z, ~0, sctx->custom_blend_resolve, format);
1081 si_blitter_end(sctx);
1082
1083 /* Flush caches for possible texturing. */
1084 si_make_CB_shader_coherent(sctx, 1, false, true /* no DCC */);
1085 }
1086
resolve_formats_compatible(enum pipe_format src,enum pipe_format dst,bool src_swaps_rgb_to_bgr,bool * need_rgb_to_bgr)1087 static bool resolve_formats_compatible(enum pipe_format src, enum pipe_format dst,
1088 bool src_swaps_rgb_to_bgr, bool *need_rgb_to_bgr)
1089 {
1090 *need_rgb_to_bgr = false;
1091
1092 if (src_swaps_rgb_to_bgr) {
1093 /* We must only check the swapped format. */
1094 enum pipe_format swapped_src = util_format_rgb_to_bgr(src);
1095 assert(swapped_src);
1096 return util_is_format_compatible(util_format_description(swapped_src),
1097 util_format_description(dst));
1098 }
1099
1100 if (util_is_format_compatible(util_format_description(src), util_format_description(dst)))
1101 return true;
1102
1103 enum pipe_format swapped_src = util_format_rgb_to_bgr(src);
1104 *need_rgb_to_bgr = util_is_format_compatible(util_format_description(swapped_src),
1105 util_format_description(dst));
1106 return *need_rgb_to_bgr;
1107 }
1108
si_msaa_resolve_blit_via_CB(struct pipe_context * ctx,const struct pipe_blit_info * info,bool fail_if_slow)1109 bool si_msaa_resolve_blit_via_CB(struct pipe_context *ctx, const struct pipe_blit_info *info,
1110 bool fail_if_slow)
1111 {
1112 struct si_context *sctx = (struct si_context *)ctx;
1113
1114 /* Gfx11 doesn't have CB_RESOLVE. */
1115 if (sctx->gfx_level >= GFX11)
1116 return false;
1117
1118 struct si_texture *src = (struct si_texture *)info->src.resource;
1119 struct si_texture *dst = (struct si_texture *)info->dst.resource;
1120 unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
1121 unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
1122 enum pipe_format format = info->src.format;
1123 unsigned num_channels = util_format_description(format)->nr_channels;
1124
1125 /* Check basic requirements for hw resolve. */
1126 if (!(info->src.resource->nr_samples > 1 && info->dst.resource->nr_samples <= 1 &&
1127 !util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format) &&
1128 util_max_layer(info->src.resource, 0) == 0))
1129 return false;
1130
1131 /* Return if this is slower than alternatives. */
1132 if (fail_if_slow) {
1133 /* CB_RESOLVE is much slower without FMASK. */
1134 if (sctx->screen->debug_flags & DBG(NO_FMASK))
1135 return false;
1136
1137 /* Verified on: Tahiti, Hawaii, Tonga, Vega10, Navi10, Navi21 */
1138 switch (sctx->gfx_level) {
1139 case GFX6:
1140 return false;
1141
1142 case GFX7:
1143 if (src->surface.bpe != 16)
1144 return false;
1145 break;
1146
1147 case GFX8:
1148 case GFX9:
1149 case GFX10:
1150 return false;
1151
1152 case GFX10_3:
1153 if (!(src->surface.bpe == 8 && src->buffer.b.b.nr_samples == 8 && num_channels == 4) &&
1154 !(src->surface.bpe == 16 && src->buffer.b.b.nr_samples == 4))
1155 return false;
1156 break;
1157
1158 default:
1159 unreachable("unexpected gfx version");
1160 }
1161 }
1162
1163 /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
1164 * the format is R16G16. Use R16A16, which does work.
1165 */
1166 if (format == PIPE_FORMAT_R16G16_UNORM)
1167 format = PIPE_FORMAT_R16A16_UNORM;
1168 if (format == PIPE_FORMAT_R16G16_SNORM)
1169 format = PIPE_FORMAT_R16A16_SNORM;
1170
1171 bool need_rgb_to_bgr = false;
1172
1173 /* Check the remaining requirements for hw resolve. */
1174 if (util_max_layer(info->dst.resource, info->dst.level) == 0 && !info->scissor_enable &&
1175 !info->swizzle_enable &&
1176 (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA &&
1177 resolve_formats_compatible(info->src.format, info->dst.format,
1178 src->swap_rgb_to_bgr, &need_rgb_to_bgr) &&
1179 dst_width == info->src.resource->width0 && dst_height == info->src.resource->height0 &&
1180 info->dst.box.x == 0 && info->dst.box.y == 0 && info->dst.box.width == dst_width &&
1181 info->dst.box.height == dst_height && info->dst.box.depth == 1 && info->src.box.x == 0 &&
1182 info->src.box.y == 0 && info->src.box.width == dst_width &&
1183 info->src.box.height == dst_height && info->src.box.depth == 1 && !dst->surface.is_linear &&
1184 (!dst->cmask_buffer || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
1185 /* Check the remaining constraints. */
1186 if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode ||
1187 need_rgb_to_bgr) {
1188 /* Changing the microtile mode is not possible with GFX10. */
1189 if (sctx->gfx_level >= GFX10)
1190 return false;
1191
1192 /* The next fast clear will switch to this mode to
1193 * get direct hw resolve next time if the mode is
1194 * different now.
1195 */
1196 if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode)
1197 src->last_msaa_resolve_target_micro_mode = dst->surface.micro_tile_mode;
1198 if (need_rgb_to_bgr)
1199 src->swap_rgb_to_bgr_on_next_clear = true;
1200
1201 return false;
1202 }
1203
1204 /* Resolving into a surface with DCC is unsupported. Since
1205 * it's being overwritten anyway, clear it to uncompressed.
1206 */
1207 if (vi_dcc_enabled(dst, info->dst.level)) {
1208 struct si_clear_info clear_info;
1209
1210 if (!vi_dcc_get_clear_info(sctx, dst, info->dst.level, DCC_UNCOMPRESSED, &clear_info))
1211 return false;
1212
1213 si_barrier_before_image_fast_clear(sctx, SI_CLEAR_TYPE_DCC);
1214 si_execute_clears(sctx, &clear_info, 1, info->render_condition_enable);
1215 si_barrier_after_image_fast_clear(sctx);
1216 dst->dirty_level_mask &= ~(1 << info->dst.level);
1217 }
1218
1219 /* Resolve directly from src to dst. */
1220 si_do_CB_resolve(sctx, info, info->dst.resource, info->dst.level, info->dst.box.z, format);
1221 return true;
1222 }
1223
1224 return false;
1225 }
1226
si_blit(struct pipe_context * ctx,const struct pipe_blit_info * info)1227 static void si_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
1228 {
1229 struct si_context *sctx = (struct si_context *)ctx;
1230 struct si_texture *sdst = (struct si_texture *)info->dst.resource;
1231
1232 if (sctx->gfx_level >= GFX7 &&
1233 (info->dst.resource->bind & PIPE_BIND_PRIME_BLIT_DST) && sdst->surface.is_linear &&
1234 /* Use SDMA or async compute when copying to a DRI_PRIME imported linear surface. */
1235 info->dst.box.x == 0 && info->dst.box.y == 0 && info->dst.box.z == 0 &&
1236 info->src.box.x == 0 && info->src.box.y == 0 && info->src.box.z == 0 &&
1237 info->dst.level == 0 && info->src.level == 0 &&
1238 info->src.box.width == info->dst.resource->width0 &&
1239 info->src.box.height == info->dst.resource->height0 &&
1240 info->src.box.depth == 1 &&
1241 util_can_blit_via_copy_region(info, true, sctx->render_cond != NULL)) {
1242 struct si_texture *ssrc = (struct si_texture *)info->src.resource;
1243
1244 /* Try SDMA first... */
1245 if (si_sdma_copy_image(sctx, sdst, ssrc))
1246 return;
1247
1248 /* ... and use async compute as the fallback. */
1249 struct si_screen *sscreen = sctx->screen;
1250
1251 simple_mtx_lock(&sscreen->async_compute_context_lock);
1252 if (!sscreen->async_compute_context)
1253 si_init_aux_async_compute_ctx(sscreen);
1254
1255 if (sscreen->async_compute_context) {
1256 si_compute_copy_image((struct si_context*)sctx->screen->async_compute_context,
1257 info->dst.resource, 0, info->src.resource, 0, 0, 0, 0,
1258 &info->src.box, false);
1259 si_flush_gfx_cs((struct si_context*)sctx->screen->async_compute_context, 0, NULL);
1260 simple_mtx_unlock(&sscreen->async_compute_context_lock);
1261 return;
1262 }
1263
1264 simple_mtx_unlock(&sscreen->async_compute_context_lock);
1265 }
1266
1267 if (unlikely(sctx->sqtt_enabled))
1268 sctx->sqtt_next_event = EventCmdResolveImage;
1269
1270 if (si_msaa_resolve_blit_via_CB(ctx, info, true))
1271 return;
1272
1273 if (unlikely(sctx->sqtt_enabled))
1274 sctx->sqtt_next_event = EventCmdCopyImage;
1275
1276 if (si_compute_blit(sctx, info, NULL, 0, 0, true))
1277 return;
1278
1279 si_gfx_blit(ctx, info);
1280 }
1281
si_gfx_blit(struct pipe_context * ctx,const struct pipe_blit_info * info)1282 void si_gfx_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
1283 {
1284 struct si_context *sctx = (struct si_context *)ctx;
1285
1286 assert(util_blitter_is_blit_supported(sctx->blitter, info));
1287
1288 /* The driver doesn't decompress resources automatically while
1289 * u_blitter is rendering. */
1290 vi_disable_dcc_if_incompatible_format(sctx, info->src.resource, info->src.level,
1291 info->src.format);
1292 vi_disable_dcc_if_incompatible_format(sctx, info->dst.resource, info->dst.level,
1293 info->dst.format);
1294 si_decompress_subresource(ctx, info->src.resource, PIPE_MASK_RGBAZS, info->src.level,
1295 info->src.box.z, info->src.box.z + info->src.box.depth - 1,
1296 false);
1297
1298 if (unlikely(sctx->sqtt_enabled))
1299 sctx->sqtt_next_event = EventCmdBlitImage;
1300
1301 /* Use a custom MSAA resolving pixel shader. */
1302 void *fs = NULL;
1303 if (!util_format_is_depth_or_stencil(info->dst.resource->format) &&
1304 !util_format_is_depth_or_stencil(info->src.resource->format) &&
1305 !util_format_is_pure_integer(info->dst.format) &&
1306 info->dst.resource->nr_samples <= 1 &&
1307 info->src.resource->nr_samples >= 2 &&
1308 !info->sample0_only &&
1309 (info->filter == PIPE_TEX_FILTER_NEAREST ||
1310 /* No scaling */
1311 (info->dst.box.width == abs(info->src.box.width) &&
1312 info->dst.box.height == abs(info->src.box.height)))) {
1313 union ac_ps_resolve_key key;
1314 key.key = 0;
1315
1316 /* LLVM is slower on GFX10.3 and older because it doesn't form VMEM clauses and it's more
1317 * difficult to force them with optimization barriers when FMASK is used.
1318 */
1319 key.use_aco = true;
1320 key.src_is_array = info->src.resource->target == PIPE_TEXTURE_1D_ARRAY ||
1321 info->src.resource->target == PIPE_TEXTURE_2D_ARRAY ||
1322 info->src.resource->target == PIPE_TEXTURE_CUBE ||
1323 info->src.resource->target == PIPE_TEXTURE_CUBE_ARRAY;
1324 key.log_samples = util_logbase2(info->src.resource->nr_samples);
1325 key.last_dst_channel = util_format_get_last_component(info->dst.format);
1326 key.last_src_channel = util_format_get_last_component(info->src.format);
1327 key.last_src_channel = MIN2(key.last_src_channel, key.last_dst_channel);
1328 key.x_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(0));
1329 key.y_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(1));
1330 key.a16 = sctx->gfx_level >= GFX9 && util_is_box_sint16(&info->dst.box) &&
1331 util_is_box_sint16(&info->src.box);
1332 unsigned max_dst_chan_size = util_format_get_max_channel_size(info->dst.format);
1333 unsigned max_src_chan_size = util_format_get_max_channel_size(info->src.format);
1334
1335 if (key.use_aco && util_format_is_float(info->dst.format) && max_dst_chan_size == 32) {
1336 /* TODO: ACO doesn't meet precision expectations of this test when the destination format
1337 * is R32G32B32A32_FLOAT, the source format is R8G8B8A8_UNORM, and the resolving math uses
1338 * FP16. It's theoretically arguable whether FP16 is legal in this case. LLVM passes
1339 * the test.
1340 *
1341 * piglit/bin/copyteximage CUBE -samples=2 -auto
1342 */
1343 key.d16 = 0;
1344 } else {
1345 /* Resolving has precision issues all the way down to R11G11B10_FLOAT. */
1346 key.d16 = ((!key.use_aco && !sctx->screen->use_aco && sctx->gfx_level >= GFX8) ||
1347 /* ACO doesn't support D16 on GFX8 */
1348 ((key.use_aco || sctx->screen->use_aco) && sctx->gfx_level >= GFX9)) &&
1349 MIN2(max_dst_chan_size, max_src_chan_size) <= 10;
1350 }
1351
1352 fs = _mesa_hash_table_u64_search(sctx->ps_resolve_shaders, key.key);
1353 if (!fs) {
1354 struct ac_ps_resolve_options options = {
1355 .nir_options = sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR,
1356 PIPE_SHADER_FRAGMENT),
1357 .info = &sctx->screen->info,
1358 .use_aco = sctx->screen->use_aco,
1359 .no_fmask = sctx->screen->debug_flags & DBG(NO_FMASK),
1360 .print_key = si_can_dump_shader(sctx->screen, MESA_SHADER_FRAGMENT, SI_DUMP_SHADER_KEY),
1361 };
1362
1363 fs = si_create_shader_state(sctx, ac_create_resolve_ps(&options, &key));
1364 _mesa_hash_table_u64_insert(sctx->ps_resolve_shaders, key.key, fs);
1365 }
1366 }
1367
1368 si_blitter_begin(sctx, SI_BLIT | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
1369 util_blitter_blit(sctx->blitter, info, fs);
1370 si_blitter_end(sctx);
1371 }
1372
si_generate_mipmap(struct pipe_context * ctx,struct pipe_resource * tex,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)1373 static bool si_generate_mipmap(struct pipe_context *ctx, struct pipe_resource *tex,
1374 enum pipe_format format, unsigned base_level, unsigned last_level,
1375 unsigned first_layer, unsigned last_layer)
1376 {
1377 struct si_context *sctx = (struct si_context *)ctx;
1378 struct si_texture *stex = (struct si_texture *)tex;
1379
1380 if (!util_blitter_is_copy_supported(sctx->blitter, tex, tex))
1381 return false;
1382
1383 /* The driver doesn't decompress resources automatically while
1384 * u_blitter is rendering. */
1385 vi_disable_dcc_if_incompatible_format(sctx, tex, base_level, format);
1386 si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS, base_level, first_layer, last_layer,
1387 false);
1388
1389 /* Clear dirty_level_mask for the levels that will be overwritten. */
1390 assert(base_level < last_level);
1391 stex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1, last_level - base_level);
1392
1393 sctx->generate_mipmap_for_depth = stex->is_depth;
1394
1395 si_blitter_begin(sctx, SI_BLIT | SI_DISABLE_RENDER_COND);
1396 util_blitter_generate_mipmap(sctx->blitter, tex, format, base_level, last_level, first_layer,
1397 last_layer);
1398 si_blitter_end(sctx);
1399
1400 sctx->generate_mipmap_for_depth = false;
1401 return true;
1402 }
1403
si_flush_resource(struct pipe_context * ctx,struct pipe_resource * res)1404 static void si_flush_resource(struct pipe_context *ctx, struct pipe_resource *res)
1405 {
1406 struct si_context *sctx = (struct si_context *)ctx;
1407 struct si_texture *tex = (struct si_texture *)res;
1408
1409 if (sctx->gfx_level >= GFX12 || res->target == PIPE_BUFFER)
1410 return;
1411
1412 if (!tex->is_depth && (tex->cmask_buffer || vi_dcc_enabled(tex, 0))) {
1413 si_blit_decompress_color(sctx, tex, 0, res->last_level, 0, util_max_layer(res, 0),
1414 false, false);
1415
1416 if (tex->surface.display_dcc_offset && tex->displayable_dcc_dirty) {
1417 si_retile_dcc(sctx, tex);
1418 tex->displayable_dcc_dirty = false;
1419 }
1420 }
1421 }
1422
si_flush_implicit_resources(struct si_context * sctx)1423 void si_flush_implicit_resources(struct si_context *sctx)
1424 {
1425 assert(sctx->gfx_level < GFX12);
1426
1427 hash_table_foreach(sctx->dirty_implicit_resources, entry) {
1428 si_flush_resource(&sctx->b, entry->data);
1429 pipe_resource_reference((struct pipe_resource **)&entry->data, NULL);
1430 }
1431 _mesa_hash_table_clear(sctx->dirty_implicit_resources, NULL);
1432 }
1433
si_decompress_dcc(struct si_context * sctx,struct si_texture * tex)1434 void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex)
1435 {
1436 assert(sctx->gfx_level < GFX12);
1437 assert(!tex->is_depth);
1438
1439 /* If graphics is disabled, we can't decompress DCC, but it shouldn't
1440 * be compressed either. The caller should simply discard it.
1441 * If blitter is running, we can't decompress DCC either because it
1442 * will cause a blitter recursion.
1443 */
1444 if (!tex->surface.meta_offset || !sctx->has_graphics || sctx->blitter_running)
1445 return;
1446
1447 si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level, 0,
1448 util_max_layer(&tex->buffer.b.b, 0), true, false);
1449 }
1450
si_init_blit_functions(struct si_context * sctx)1451 void si_init_blit_functions(struct si_context *sctx)
1452 {
1453 sctx->b.resource_copy_region = si_resource_copy_region;
1454
1455 if (sctx->has_graphics) {
1456 sctx->b.blit = si_blit;
1457 sctx->b.flush_resource = si_flush_resource;
1458 sctx->b.generate_mipmap = si_generate_mipmap;
1459 }
1460 }
1461