• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "si_pipe.h"
8 #include "sid.h"
9 #include "util/format/u_format.h"
10 #include "util/u_pack_color.h"
11 #include "util/u_surface.h"
12 #include "ac_formats.h"
13 
14 enum {
15    SI_CLEAR = SI_SAVE_FRAGMENT_STATE | SI_SAVE_FRAGMENT_CONSTANT,
16    SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
17 };
18 
si_init_buffer_clear(struct si_clear_info * info,struct pipe_resource * resource,uint64_t offset,uint32_t size,uint32_t clear_value)19 void si_init_buffer_clear(struct si_clear_info *info,
20                           struct pipe_resource *resource, uint64_t offset,
21                           uint32_t size, uint32_t clear_value)
22 {
23    info->resource = resource;
24    info->offset = offset;
25    info->size = size;
26    info->clear_value = clear_value;
27    info->writemask = 0xffffffff;
28    info->is_dcc_msaa = false;
29    info->format = PIPE_FORMAT_NONE;
30 }
31 
si_init_buffer_clear_rmw(struct si_clear_info * info,struct pipe_resource * resource,uint64_t offset,uint32_t size,uint32_t clear_value,uint32_t writemask)32 static void si_init_buffer_clear_rmw(struct si_clear_info *info,
33                                      struct pipe_resource *resource, uint64_t offset,
34                                      uint32_t size, uint32_t clear_value, uint32_t writemask)
35 {
36    si_init_buffer_clear(info, resource, offset, size, clear_value);
37    info->writemask = writemask;
38    info->format = PIPE_FORMAT_NONE;
39 }
40 
si_init_clear_image_dcc_single(struct si_clear_info * info,struct si_texture * tex,unsigned level,enum pipe_format format,const union pipe_color_union * color)41 static void si_init_clear_image_dcc_single(struct si_clear_info *info, struct si_texture *tex,
42                                            unsigned level, enum pipe_format format,
43                                            const union pipe_color_union *color)
44 {
45    info->resource = &tex->buffer.b.b;
46    info->level = level;
47    info->format = format;
48    memcpy(&info->color, color, sizeof(info->color));
49 }
50 
si_execute_clears(struct si_context * sctx,struct si_clear_info * info,unsigned num_clears,bool render_condition_enable)51 void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
52                        unsigned num_clears, bool render_condition_enable)
53 {
54    assert(num_clears);
55 
56    /* Execute clears. */
57    for (unsigned i = 0; i < num_clears; i++) {
58       if (info[i].format) {
59          si_compute_clear_image_dcc_single(sctx, (struct si_texture*)info[i].resource,
60                                            info[i].level, info[i].format, &info[i].color,
61                                            render_condition_enable);
62          continue;
63       }
64 
65       if (info[i].is_dcc_msaa) {
66          gfx9_clear_dcc_msaa(sctx, info[i].resource, info[i].clear_value, render_condition_enable);
67          continue;
68       }
69 
70       assert(info[i].size > 0);
71 
72       if (info[i].writemask != 0xffffffff) {
73          si_compute_clear_buffer_rmw(sctx, info[i].resource, info[i].offset, info[i].size,
74                                      info[i].clear_value, info[i].writemask,
75                                      render_condition_enable);
76       } else {
77          /* Compute shaders are much faster on both dGPUs and APUs. Don't use CP DMA. */
78          si_clear_buffer(sctx, info[i].resource, info[i].offset, info[i].size,
79                          &info[i].clear_value, 4, SI_COMPUTE_CLEAR_METHOD,
80                          render_condition_enable);
81       }
82    }
83 }
84 
si_alloc_separate_cmask(struct si_screen * sscreen,struct si_texture * tex)85 static bool si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex)
86 {
87    assert(sscreen->info.gfx_level < GFX11);
88 
89    /* CMASK for MSAA is allocated in advance or always disabled
90     * by "nofmask" option.
91     */
92    if (tex->cmask_buffer)
93       return true;
94 
95    if (!tex->surface.cmask_size)
96       return false;
97 
98    tex->cmask_buffer =
99       si_aligned_buffer_create(&sscreen->b, PIPE_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT,
100                                tex->surface.cmask_size, 1 << tex->surface.cmask_alignment_log2);
101    if (tex->cmask_buffer == NULL)
102       return false;
103 
104    /* These 2 fields are part of the framebuffer state but dirtying the atom
105     * will be done by the caller.
106     */
107    tex->cmask_base_address_reg = tex->cmask_buffer->gpu_address >> 8;
108    tex->cb_color_info |= S_028C70_FAST_CLEAR(1);
109 
110    p_atomic_inc(&sscreen->compressed_colortex_counter);
111    return true;
112 }
113 
si_set_clear_color(struct si_texture * tex,enum pipe_format surface_format,const union pipe_color_union * color)114 static bool si_set_clear_color(struct si_texture *tex, enum pipe_format surface_format,
115                                const union pipe_color_union *color)
116 {
117    union util_color uc;
118 
119    memset(&uc, 0, sizeof(uc));
120 
121    if (tex->surface.bpe == 16) {
122       /* DCC fast clear only:
123        *   CLEAR_WORD0 = R = G = B
124        *   CLEAR_WORD1 = A
125        */
126       assert(color->ui[0] == color->ui[1] && color->ui[0] == color->ui[2]);
127       uc.ui[0] = color->ui[0];
128       uc.ui[1] = color->ui[3];
129    } else {
130       if (tex->swap_rgb_to_bgr)
131          surface_format = util_format_rgb_to_bgr(surface_format);
132 
133       util_pack_color_union(surface_format, &uc, color);
134    }
135 
136    if (memcmp(tex->color_clear_value, &uc, 2 * sizeof(uint32_t)) == 0)
137       return false;
138 
139    memcpy(tex->color_clear_value, &uc, 2 * sizeof(uint32_t));
140    return true;
141 }
142 
gfx8_get_dcc_clear_parameters(struct si_screen * sscreen,enum pipe_format base_format,enum pipe_format surface_format,const union pipe_color_union * color,uint32_t * clear_value,bool * eliminate_needed)143 static bool gfx8_get_dcc_clear_parameters(struct si_screen *sscreen, enum pipe_format base_format,
144                                           enum pipe_format surface_format,
145                                           const union pipe_color_union *color, uint32_t *clear_value,
146                                           bool *eliminate_needed)
147 {
148    /* If we want to clear without needing a fast clear eliminate step, we
149     * can set color and alpha independently to 0 or 1 (or 0/max for integer
150     * formats).
151     */
152    bool values[4] = {};      /* whether to clear to 0 or 1 */
153    bool color_value = false; /* clear color to 0 or 1 */
154    bool alpha_value = false; /* clear alpha to 0 or 1 */
155    int alpha_channel;        /* index of the alpha component */
156    bool has_color = false;
157    bool has_alpha = false;
158 
159    const struct util_format_description *desc =
160       util_format_description(ac_simplify_cb_format(surface_format));
161 
162    /* 128-bit fast clear with different R,G,B values is unsupported. */
163    if (desc->block.bits == 128 && (color->ui[0] != color->ui[1] || color->ui[0] != color->ui[2]))
164       return false;
165 
166    *eliminate_needed = true;
167    *clear_value = GFX8_DCC_CLEAR_REG;
168 
169    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
170       return true; /* need ELIMINATE_FAST_CLEAR */
171 
172    bool base_alpha_is_on_msb = ac_alpha_is_on_msb(&sscreen->info, base_format);
173    bool surf_alpha_is_on_msb = ac_alpha_is_on_msb(&sscreen->info, surface_format);
174 
175    /* Formats with 3 channels can't have alpha. */
176    if (desc->nr_channels == 3)
177       alpha_channel = -1;
178    else if (surf_alpha_is_on_msb)
179       alpha_channel = desc->nr_channels - 1;
180    else
181       alpha_channel = 0;
182 
183    for (int i = 0; i < 4; ++i) {
184       if (desc->swizzle[i] >= PIPE_SWIZZLE_0)
185          continue;
186 
187       if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
188          /* Use the maximum value for clamping the clear color. */
189          int max = u_bit_consecutive(0, desc->channel[i].size - 1);
190 
191          values[i] = color->i[i] != 0;
192          if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
193             return true; /* need ELIMINATE_FAST_CLEAR */
194       } else if (desc->channel[i].pure_integer &&
195                  desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
196          /* Use the maximum value for clamping the clear color. */
197          unsigned max = u_bit_consecutive(0, desc->channel[i].size);
198 
199          values[i] = color->ui[i] != 0U;
200          if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)
201             return true; /* need ELIMINATE_FAST_CLEAR */
202       } else {
203          values[i] = color->f[i] != 0.0F;
204          if (color->f[i] != 0.0F && color->f[i] != 1.0F)
205             return true; /* need ELIMINATE_FAST_CLEAR */
206       }
207 
208       if (desc->swizzle[i] == alpha_channel) {
209          alpha_value = values[i];
210          has_alpha = true;
211       } else {
212          color_value = values[i];
213          has_color = true;
214       }
215    }
216 
217    /* If alpha isn't present, make it the same as color, and vice versa. */
218    if (!has_alpha)
219       alpha_value = color_value;
220    else if (!has_color)
221       color_value = alpha_value;
222 
223    if (color_value != alpha_value && base_alpha_is_on_msb != surf_alpha_is_on_msb)
224       return true; /* require ELIMINATE_FAST_CLEAR */
225 
226    /* Check if all color values are equal if they are present. */
227    for (int i = 0; i < 4; ++i) {
228       if (desc->swizzle[i] <= PIPE_SWIZZLE_W && desc->swizzle[i] != alpha_channel &&
229           values[i] != color_value)
230          return true; /* require ELIMINATE_FAST_CLEAR */
231    }
232 
233    /* This doesn't need ELIMINATE_FAST_CLEAR.
234     * On chips predating Raven2, the DCC clear codes and the CB clear
235     * color registers must match.
236     */
237    *eliminate_needed = false;
238 
239    if (color_value) {
240       if (alpha_value)
241          *clear_value = GFX8_DCC_CLEAR_1111;
242       else
243          *clear_value = GFX8_DCC_CLEAR_1110;
244    } else {
245       if (alpha_value)
246          *clear_value = GFX8_DCC_CLEAR_0001;
247       else
248          *clear_value = GFX8_DCC_CLEAR_0000;
249    }
250    return true;
251 }
252 
gfx11_get_dcc_clear_parameters(struct si_screen * sscreen,struct si_texture * tex,unsigned level,enum pipe_format surface_format,const union pipe_color_union * color,uint32_t * clear_value,bool fail_if_slow)253 static bool gfx11_get_dcc_clear_parameters(struct si_screen *sscreen, struct si_texture *tex,
254                                            unsigned level, enum pipe_format surface_format,
255                                            const union pipe_color_union *color, uint32_t *clear_value,
256                                            bool fail_if_slow)
257 {
258    const struct util_format_description *desc =
259       util_format_description(ac_simplify_cb_format(surface_format));
260    unsigned start_bit = UINT_MAX;
261    unsigned end_bit = 0;
262 
263    /* Find the used bit range. */
264    for (unsigned i = 0; i < 4; i++) {
265       unsigned swizzle = desc->swizzle[i];
266 
267       if (swizzle >= PIPE_SWIZZLE_0)
268          continue;
269 
270       start_bit = MIN2(start_bit, desc->channel[swizzle].shift);
271       end_bit = MAX2(end_bit, desc->channel[swizzle].shift + desc->channel[swizzle].size);
272    }
273 
274    union {
275       uint8_t ub[16];
276       uint16_t us[8];
277       uint32_t ui[4];
278    } value = {};
279    util_pack_color_union(surface_format, (union util_color*)&value, color);
280 
281    /* Check the cases where all components or bits are either all 0 or all 1. */
282    bool all_bits_are_0 = true;
283    bool all_bits_are_1 = true;
284    bool all_words_are_fp16_1 = false;
285    bool all_words_are_fp32_1 = false;
286 
287    for (unsigned i = start_bit; i < end_bit; i++) {
288       bool bit = value.ub[i / 8] & BITFIELD_BIT(i % 8);
289 
290       all_bits_are_0 &= !bit;
291       all_bits_are_1 &= bit;
292    }
293 
294    if (start_bit % 16 == 0 && end_bit % 16 == 0) {
295       all_words_are_fp16_1 = true;
296       for (unsigned i = start_bit / 16; i < end_bit / 16; i++)
297          all_words_are_fp16_1 &= value.us[i] == 0x3c00;
298    }
299 
300    if (start_bit % 32 == 0 && end_bit % 32 == 0) {
301       all_words_are_fp32_1 = true;
302       for (unsigned i = start_bit / 32; i < end_bit / 32; i++)
303          all_words_are_fp32_1 &= value.ui[i] == 0x3f800000;
304    }
305 
306 #if 0 /* debug code */
307    int i = util_format_get_first_non_void_channel(surface_format);
308    if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[i].pure_integer) {
309       printf("%i %i %i %i\n", color->i[0], color->i[1], color->i[2], color->i[3]);
310    } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED && desc->channel[i].pure_integer) {
311       printf("%u %u %u %u\n", color->ui[0], color->ui[1], color->ui[2], color->ui[3]);
312    } else {
313       printf("%f %f %f %f\n", color->f[0], color->f[1], color->f[2], color->f[3]);
314    }
315    for (unsigned i = 0; i < end_bit / 8; i++)
316       printf("%02x", value.ub[i]);
317    printf("\n");
318    printf("bits=[%u..%u)%s%s%s%s\n", start_bit, end_bit,
319           all_bits_are_0 ? ", all 0" : "",
320           all_bits_are_1 ? ", all 1" : "",
321           all_words_are_fp16_1 ? ", all fp16 1" : "",
322           all_words_are_fp32_1 ? ", all fp32 1" : "");
323 #endif
324 
325    *clear_value = 0;
326 
327    if (all_bits_are_0 || all_bits_are_1 || all_words_are_fp16_1 || all_words_are_fp32_1) {
328       if (all_bits_are_0)
329          *clear_value = GFX11_DCC_CLEAR_0000;
330       else if (all_bits_are_1)
331          *clear_value = GFX11_DCC_CLEAR_1111_UNORM;
332       else if (all_words_are_fp16_1)
333          *clear_value = GFX11_DCC_CLEAR_1111_FP16;
334       else if (all_words_are_fp32_1)
335          *clear_value = GFX11_DCC_CLEAR_1111_FP32;
336 
337       return true;
338    }
339 
340    /* Check 0001 and 1110 cases. */
341    if (desc->nr_channels == 2 && desc->channel[0].size == 8) {
342       if (value.ub[0] == 0x00 && value.ub[1] == 0xff) {
343          *clear_value = GFX11_DCC_CLEAR_0001_UNORM;
344          return true;
345       } else if (value.ub[0] == 0xff && value.ub[1] == 0x00) {
346          *clear_value = GFX11_DCC_CLEAR_1110_UNORM;
347          return true;
348       }
349    } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) {
350       if (value.ub[0] == 0x00 && value.ub[1] == 0x00 &&
351           value.ub[2] == 0x00 && value.ub[3] == 0xff) {
352          *clear_value = GFX11_DCC_CLEAR_0001_UNORM;
353          return true;
354       } else if (value.ub[0] == 0xff && value.ub[1] == 0xff &&
355                  value.ub[2] == 0xff && value.ub[3] == 0x00) {
356          *clear_value = GFX11_DCC_CLEAR_1110_UNORM;
357          return true;
358       }
359    } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) {
360       if (value.us[0] == 0x0000 && value.us[1] == 0x0000 &&
361           value.us[2] == 0x0000 && value.us[3] == 0xffff) {
362          *clear_value = GFX11_DCC_CLEAR_0001_UNORM;
363          return true;
364       } else if (value.us[0] == 0xffff && value.us[1] == 0xffff &&
365                  value.us[2] == 0xffff && value.us[3] == 0x0000) {
366          *clear_value = GFX11_DCC_CLEAR_1110_UNORM;
367          return true;
368       }
369    }
370 
371    /* Estimate whether DCC clear-to-single is better than a slow clear. */
372    unsigned width = u_minify(tex->buffer.b.b.width0, level);
373    unsigned height = u_minify(tex->buffer.b.b.height0, level);
374    unsigned depth = util_num_layers(&tex->buffer.b.b, level);
375    unsigned num_samples = MAX2(tex->buffer.b.b.nr_samples, 1);
376    uint64_t size = (uint64_t)width * height * depth * num_samples * tex->surface.bpe;
377 
378    /* These cases perform exceptionally well with DCC clear-to-single, so make them more likely. */
379    if ((num_samples <= 2 && tex->surface.bpe <= 2) ||
380        (num_samples == 1 && tex->surface.bpe == 4))
381       size *= 2;
382 
383    /* These cases perform terribly with DCC clear-to-single. */
384    if (tex->buffer.b.b.nr_samples >= 4 && tex->surface.bpe >= 4)
385       size = 0;
386 
387    /* This is mostly optimal for Navi31. The scaling effect of num_rb on other chips is guessed. */
388    if (!fail_if_slow || size >= sscreen->info.num_rb * 512 * 1024) {
389       *clear_value = GFX11_DCC_CLEAR_SINGLE;
390       return true;
391    }
392 
393    return false;
394 }
395 
vi_dcc_get_clear_info(struct si_context * sctx,struct si_texture * tex,unsigned level,unsigned clear_value,struct si_clear_info * out)396 bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsigned level,
397                            unsigned clear_value, struct si_clear_info *out)
398 {
399    struct pipe_resource *dcc_buffer = &tex->buffer.b.b;
400    uint64_t dcc_offset = tex->surface.meta_offset;
401    uint32_t clear_size;
402 
403    assert(vi_dcc_enabled(tex, level));
404 
405    if (sctx->gfx_level >= GFX10) {
406       /* 4x and 8x MSAA needs a sophisticated compute shader for
407        * the clear. GFX11 doesn't need that.
408        */
409       if (sctx->gfx_level < GFX11 && tex->buffer.b.b.nr_storage_samples >= 4)
410          return false;
411 
412       unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
413 
414       if (num_layers == 1) {
415          /* Clear a specific level. */
416          dcc_offset += tex->surface.u.gfx9.meta_levels[level].offset;
417          clear_size = tex->surface.u.gfx9.meta_levels[level].size;
418       } else if (tex->buffer.b.b.last_level == 0) {
419          /* Clear all layers having only 1 level. */
420          clear_size = tex->surface.meta_size;
421       } else {
422          /* Clearing DCC with both multiple levels and multiple layers is not
423           * implemented.
424           */
425          return false;
426       }
427    } else if (sctx->gfx_level == GFX9) {
428       /* TODO: Implement DCC fast clear for level 0 of mipmapped textures. Mipmapped
429        * DCC has to clear a rectangular area of DCC for level 0 (because the whole miptree
430        * is organized in a 2D plane).
431        */
432       if (tex->buffer.b.b.last_level > 0)
433          return false;
434 
435       /* 4x and 8x MSAA need to clear only sample 0 and 1 in a compute shader and leave other
436        * samples untouched. (only the first 2 samples are compressed) */
437       if (tex->buffer.b.b.nr_storage_samples >= 4) {
438          si_init_buffer_clear(out, dcc_buffer, 0, 0, clear_value);
439          out->is_dcc_msaa = true;
440          return true;
441       }
442 
443       clear_size = tex->surface.meta_size;
444    } else {
445       unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
446 
447       /* If this is 0, fast clear isn't possible. (can occur with MSAA) */
448       if (!tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size)
449          return false;
450 
451       /* Layered 4x and 8x MSAA DCC fast clears need to clear
452        * dcc_fast_clear_size bytes for each layer. A compute shader
453        * would be more efficient than separate per-layer clear operations.
454        */
455       if (tex->buffer.b.b.nr_storage_samples >= 4 && num_layers > 1)
456          return false;
457 
458       dcc_offset += tex->surface.u.legacy.color.dcc_level[level].dcc_offset;
459       clear_size = tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size;
460    }
461 
462    si_init_buffer_clear(out, dcc_buffer, dcc_offset, clear_size, clear_value);
463    return true;
464 }
465 
466 /* Set the same micro tile mode as the destination of the last MSAA resolve.
467  * This allows hitting the MSAA resolve fast path, which requires that both
468  * src and dst micro tile modes match.
469  */
si_set_optimal_micro_tile_mode(struct si_screen * sscreen,struct si_texture * tex)470 static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_texture *tex)
471 {
472    if (sscreen->info.gfx_level >= GFX10 || tex->buffer.b.is_shared ||
473        tex->buffer.b.b.nr_samples <= 1 ||
474        tex->surface.micro_tile_mode == tex->last_msaa_resolve_target_micro_mode)
475       return;
476 
477    assert(sscreen->info.gfx_level >= GFX9 ||
478           tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
479    assert(tex->buffer.b.b.last_level == 0);
480 
481    if (sscreen->info.gfx_level >= GFX9) {
482       /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
483       assert(tex->surface.u.gfx9.swizzle_mode >= 4);
484 
485       /* If you do swizzle_mode % 4, you'll get:
486        *   0 = Depth
487        *   1 = Standard,
488        *   2 = Displayable
489        *   3 = Rotated
490        *
491        * Depth-sample order isn't allowed:
492        */
493       assert(tex->surface.u.gfx9.swizzle_mode % 4 != 0);
494 
495       switch (tex->last_msaa_resolve_target_micro_mode) {
496       case RADEON_MICRO_MODE_DISPLAY:
497          tex->surface.u.gfx9.swizzle_mode &= ~0x3;
498          tex->surface.u.gfx9.swizzle_mode += 2; /* D */
499          break;
500       case RADEON_MICRO_MODE_STANDARD:
501          tex->surface.u.gfx9.swizzle_mode &= ~0x3;
502          tex->surface.u.gfx9.swizzle_mode += 1; /* S */
503          break;
504       case RADEON_MICRO_MODE_RENDER:
505          tex->surface.u.gfx9.swizzle_mode &= ~0x3;
506          tex->surface.u.gfx9.swizzle_mode += 3; /* R */
507          break;
508       default: /* depth */
509          assert(!"unexpected micro mode");
510          return;
511       }
512    } else if (sscreen->info.gfx_level >= GFX7) {
513       /* These magic numbers were copied from addrlib. It doesn't use
514        * any definitions for them either. They are all 2D_TILED_THIN1
515        * modes with different bpp and micro tile mode.
516        */
517       switch (tex->last_msaa_resolve_target_micro_mode) {
518       case RADEON_MICRO_MODE_DISPLAY:
519          tex->surface.u.legacy.tiling_index[0] = 10;
520          break;
521       case RADEON_MICRO_MODE_STANDARD:
522          tex->surface.u.legacy.tiling_index[0] = 14;
523          break;
524       case RADEON_MICRO_MODE_RENDER:
525          tex->surface.u.legacy.tiling_index[0] = 28;
526          break;
527       default: /* depth, thick */
528          assert(!"unexpected micro mode");
529          return;
530       }
531    } else { /* GFX6 */
532       switch (tex->last_msaa_resolve_target_micro_mode) {
533       case RADEON_MICRO_MODE_DISPLAY:
534          switch (tex->surface.bpe) {
535          case 1:
536             tex->surface.u.legacy.tiling_index[0] = 10;
537             break;
538          case 2:
539             tex->surface.u.legacy.tiling_index[0] = 11;
540             break;
541          default: /* 4, 8 */
542             tex->surface.u.legacy.tiling_index[0] = 12;
543             break;
544          }
545          break;
546       case RADEON_MICRO_MODE_STANDARD:
547          switch (tex->surface.bpe) {
548          case 1:
549             tex->surface.u.legacy.tiling_index[0] = 14;
550             break;
551          case 2:
552             tex->surface.u.legacy.tiling_index[0] = 15;
553             break;
554          case 4:
555             tex->surface.u.legacy.tiling_index[0] = 16;
556             break;
557          default: /* 8, 16 */
558             tex->surface.u.legacy.tiling_index[0] = 17;
559             break;
560          }
561          break;
562       default: /* depth, thick */
563          assert(!"unexpected micro mode");
564          return;
565       }
566    }
567 
568    tex->surface.micro_tile_mode = tex->last_msaa_resolve_target_micro_mode;
569 
570    p_atomic_inc(&sscreen->dirty_tex_counter);
571 }
572 
si_get_htile_clear_value(struct si_texture * tex,float depth)573 static uint32_t si_get_htile_clear_value(struct si_texture *tex, float depth)
574 {
575    /* Maximum 14-bit UINT value. */
576    const uint32_t max_z_value = 0x3FFF;
577 
578    /* For clears, Zmask and Smem will always be set to zero. */
579    const uint32_t zmask = 0;
580    const uint32_t smem  = 0;
581 
582    /* Convert depthValue to 14-bit zmin/zmax uint values. */
583    const uint32_t zmin = lroundf(depth * max_z_value);
584    const uint32_t zmax = zmin;
585 
586    if (tex->htile_stencil_disabled) {
587       /* Z-only HTILE is laid out as follows:
588        * |31     18|17      4|3     0|
589        * +---------+---------+-------+
590        * |  Max Z  |  Min Z  | ZMask |
591        */
592       return ((zmax & 0x3FFF) << 18) |
593              ((zmin & 0x3FFF) << 4) |
594              ((zmask & 0xF) << 0);
595    } else {
596       /* Z+S HTILE is laid out as-follows:
597        * |31       12|11 10|9    8|7   6|5   4|3     0|
598        * +-----------+-----+------+-----+-----+-------+
599        * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
600        *
601        * The base value for zRange is either zMax or zMin, depending on ZRANGE_PRECISION.
602        * For a fast clear, zMin == zMax == clearValue. This means that the base will
603        * always be the clear value (converted to 14-bit UINT).
604        *
605        * When abs(zMax-zMin) < 16, the delta is equal to the difference. In the case of
606        * fast clears, where zMax == zMin, the delta is always zero.
607        */
608       const uint32_t delta = 0;
609       const uint32_t zrange = (zmax << 6) | delta;
610 
611       /* SResults 0 & 1 are set based on the stencil compare state.
612        * For fast-clear, the default value of sr0 and sr1 are both 0x3.
613        */
614       const uint32_t sresults = 0xf;
615 
616       return ((zrange & 0xFFFFF) << 12) |
617              ((smem & 0x3) <<  8) |
618              ((sresults & 0xF) <<  4) |
619              ((zmask & 0xF) <<  0);
620    }
621 }
622 
si_can_fast_clear_depth(struct si_texture * zstex,unsigned level,float depth,unsigned buffers)623 static bool si_can_fast_clear_depth(struct si_texture *zstex, unsigned level, float depth,
624                                     unsigned buffers)
625 {
626    /* TC-compatible HTILE only supports depth clears to 0 or 1. */
627    return buffers & PIPE_CLEAR_DEPTH &&
628           si_htile_enabled(zstex, level, PIPE_MASK_Z) &&
629           (!zstex->tc_compatible_htile || depth == 0 || depth == 1);
630 }
631 
si_can_fast_clear_stencil(struct si_texture * zstex,unsigned level,uint8_t stencil,unsigned buffers)632 static bool si_can_fast_clear_stencil(struct si_texture *zstex, unsigned level, uint8_t stencil,
633                                       unsigned buffers)
634 {
635    /* TC-compatible HTILE only supports stencil clears to 0. */
636    return buffers & PIPE_CLEAR_STENCIL &&
637           si_htile_enabled(zstex, level, PIPE_MASK_S) &&
638           (!zstex->tc_compatible_htile || stencil == 0);
639 }
640 
si_fast_clear(struct si_context * sctx,unsigned * buffers,const union pipe_color_union * color,float depth,uint8_t stencil)641 static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
642                           const union pipe_color_union *color, float depth, uint8_t stencil)
643 {
644    struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
645    struct si_clear_info info[8 * 3 + 1]; /* MRTs * (CMASK + DCC + clear_dcc_single) + ZS */
646    unsigned num_clears = 0;
647    unsigned clear_types = 0;
648    unsigned num_pixels = fb->width * fb->height;
649 
650    assert(sctx->gfx_level < GFX12);
651 
652    /* This function is broken in BE, so just disable this path for now */
653 #if UTIL_ARCH_BIG_ENDIAN
654    return;
655 #endif
656 
657    /* Gather information about what to clear. */
658    unsigned color_buffer_mask = (*buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);
659    while (color_buffer_mask) {
660       unsigned i = u_bit_scan(&color_buffer_mask);
661 
662       struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture;
663       unsigned level = fb->cbufs[i]->u.tex.level;
664       unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
665 
666       /* the clear is allowed if all layers are bound */
667       if (fb->cbufs[i]->u.tex.first_layer != 0 ||
668           fb->cbufs[i]->u.tex.last_layer != num_layers - 1) {
669          continue;
670       }
671 
672       /* We can change the micro tile mode before a full clear. */
673       /* This is only used for MSAA textures when clearing all layers. */
674       si_set_optimal_micro_tile_mode(sctx->screen, tex);
675 
676       if (tex->swap_rgb_to_bgr_on_next_clear) {
677          assert(!tex->swap_rgb_to_bgr);
678          assert(tex->buffer.b.b.nr_samples >= 2);
679          tex->swap_rgb_to_bgr = true;
680          tex->swap_rgb_to_bgr_on_next_clear = false;
681 
682          /* Update all sampler views and images. */
683          p_atomic_inc(&sctx->screen->dirty_tex_counter);
684       }
685 
686       /* only supported on tiled surfaces */
687       if (tex->surface.is_linear) {
688          continue;
689       }
690 
691       /* Use a slow clear for small surfaces where the cost of
692        * the eliminate pass can be higher than the benefit of fast
693        * clear. The closed driver does this, but the numbers may differ.
694        *
695        * This helps on both dGPUs and APUs, even small APUs like Mullins.
696        */
697       bool fb_too_small = (uint64_t)num_pixels * num_layers <= 512 * 512;
698       bool too_small = tex->buffer.b.b.nr_samples <= 1 && fb_too_small;
699       bool eliminate_needed = false;
700       bool fmask_decompress_needed = false;
701       bool need_dirtying_fb = false;
702 
703       /* Try to clear DCC first, otherwise try CMASK. */
704       if (vi_dcc_enabled(tex, level)) {
705          uint32_t reset_value;
706 
707          if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
708             continue;
709 
710          if (sctx->gfx_level >= GFX11) {
711             if (!gfx11_get_dcc_clear_parameters(sctx->screen, tex, level, fb->cbufs[i]->format,
712                                                 color, &reset_value, true))
713                continue;
714          } else {
715             if (!gfx8_get_dcc_clear_parameters(sctx->screen, tex->buffer.b.b.format,
716                                                fb->cbufs[i]->format, color, &reset_value,
717                                                &eliminate_needed))
718                continue;
719          }
720 
721          /* Shared textures can't use fast clear without an explicit flush
722           * because the clear color is not exported.
723           *
724           * Chips without DCC constant encoding must set the clear color registers
725           * correctly even if the fast clear eliminate pass is not needed.
726           */
727          if ((eliminate_needed || !sctx->screen->info.has_dcc_constant_encode) &&
728              tex->buffer.b.is_shared &&
729              !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
730             continue;
731 
732          if (eliminate_needed && too_small)
733             continue;
734 
735          /* We can clear any level, but we only set up the clear value registers for the first
736           * level. Therefore, all other levels can be cleared only if the clear value registers
737           * are not used, which is only the case with DCC constant encoding and 0/1 clear values.
738           */
739          if (level > 0 && (eliminate_needed || !sctx->screen->info.has_dcc_constant_encode))
740             continue;
741 
742          if (tex->buffer.b.b.nr_samples >= 2 && eliminate_needed &&
743              !sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)])
744             continue;
745 
746          assert(num_clears < ARRAY_SIZE(info));
747 
748          if (!vi_dcc_get_clear_info(sctx, tex, level, reset_value, &info[num_clears]))
749             continue;
750 
751          num_clears++;
752          clear_types |= SI_CLEAR_TYPE_DCC;
753 
754          si_mark_display_dcc_dirty(sctx, tex);
755 
756          if (sctx->gfx_level >= GFX11 && reset_value == GFX11_DCC_CLEAR_SINGLE) {
757             /* Put this clear first by moving other clears after it because this clear has
758              * the most GPU overhead.
759              */
760             if (num_clears)
761                memmove(&info[1], &info[0], sizeof(info[0]) * num_clears);
762 
763             si_init_clear_image_dcc_single(&info[0], tex, level, fb->cbufs[i]->format,
764                                            color);
765             num_clears++;
766          }
767 
768          /* DCC fast clear with MSAA should clear CMASK to 0xC. */
769          if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) {
770             assert(sctx->gfx_level < GFX11); /* no FMASK/CMASK on GFX11 */
771             assert(num_clears < ARRAY_SIZE(info));
772             si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b,
773                                  tex->surface.cmask_offset, tex->surface.cmask_size, 0xCCCCCCCC);
774             clear_types |= SI_CLEAR_TYPE_CMASK;
775             fmask_decompress_needed = true;
776          }
777       } else {
778          /* No CMASK on GFX11. */
779          if (sctx->gfx_level >= GFX11)
780             continue;
781 
782          if (level > 0)
783             continue;
784 
785          /* Shared textures can't use fast clear without an explicit flush
786           * because the clear color is not exported.
787           */
788          if (tex->buffer.b.is_shared &&
789              !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
790             continue;
791 
792          if (too_small)
793             continue;
794 
795          /* 128-bit formats are unsupported */
796          if (tex->surface.bpe > 8) {
797             continue;
798          }
799 
800          /* RB+ doesn't work with CMASK fast clear on Stoney. */
801          if (sctx->family == CHIP_STONEY)
802             continue;
803 
804          /* Disable fast clear if tex is encrypted */
805          if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)
806             continue;
807 
808          uint64_t cmask_offset = 0;
809          unsigned clear_size = 0;
810          bool had_cmask_buffer = tex->cmask_buffer != NULL;
811 
812          if (sctx->gfx_level >= GFX10) {
813             assert(level == 0);
814 
815             /* Clearing CMASK with both multiple levels and multiple layers is not
816              * implemented.
817              */
818             if (num_layers > 1 && tex->buffer.b.b.last_level > 0)
819                continue;
820 
821             if (!si_alloc_separate_cmask(sctx->screen, tex))
822                continue;
823 
824             if (num_layers == 1) {
825                /* Clear level 0. */
826                cmask_offset = tex->surface.cmask_offset + tex->surface.u.gfx9.color.cmask_level0.offset;
827                clear_size = tex->surface.u.gfx9.color.cmask_level0.size;
828             } else if (tex->buffer.b.b.last_level == 0) {
829                /* Clear all layers having only 1 level. */
830                cmask_offset = tex->surface.cmask_offset;
831                clear_size = tex->surface.cmask_size;
832             } else {
833                assert(0); /* this is prevented above */
834             }
835          } else if (sctx->gfx_level == GFX9) {
836             /* TODO: Implement CMASK fast clear for level 0 of mipmapped textures. Mipmapped
837              * CMASK has to clear a rectangular area of CMASK for level 0 (because the whole
838              * miptree is organized in a 2D plane).
839              */
840             if (tex->buffer.b.b.last_level > 0)
841                continue;
842 
843             if (!si_alloc_separate_cmask(sctx->screen, tex))
844                continue;
845 
846             cmask_offset = tex->surface.cmask_offset;
847             clear_size = tex->surface.cmask_size;
848          } else {
849             if (!si_alloc_separate_cmask(sctx->screen, tex))
850                continue;
851 
852             /* GFX6-8: This only covers mipmap level 0. */
853             cmask_offset = tex->surface.cmask_offset;
854             clear_size = tex->surface.cmask_size;
855          }
856 
857          /* Do the fast clear. */
858          assert(num_clears < ARRAY_SIZE(info));
859          si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b,
860                               cmask_offset, clear_size, 0);
861          clear_types |= SI_CLEAR_TYPE_CMASK;
862          eliminate_needed = true;
863          /* If we allocated a cmask buffer for this tex we need to re-emit
864           * the fb state.
865           */
866          need_dirtying_fb = !had_cmask_buffer;
867       }
868 
869       if ((eliminate_needed || fmask_decompress_needed) &&
870           !(tex->dirty_level_mask & (1 << level))) {
871          assert(sctx->gfx_level < GFX11); /* no decompression needed on GFX11 */
872          tex->dirty_level_mask |= 1 << level;
873          p_atomic_inc(&sctx->screen->compressed_colortex_counter);
874       }
875 
876       *buffers &= ~(PIPE_CLEAR_COLOR0 << i);
877 
878       /* Chips with DCC constant encoding don't need to set the clear
879        * color registers for DCC clear values 0 and 1.
880        */
881       if (sctx->screen->info.has_dcc_constant_encode && !eliminate_needed)
882          continue;
883 
884       /* There are no clear color registers on GFX11. */
885       assert(sctx->gfx_level < GFX11);
886 
887       if (si_set_clear_color(tex, fb->cbufs[i]->format, color) || need_dirtying_fb) {
888          sctx->framebuffer.dirty_cbufs |= 1 << i;
889          si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
890       }
891    }
892 
893    /* Depth/stencil clears. */
894    struct pipe_surface *zsbuf = fb->zsbuf;
895    struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL;
896    unsigned zs_num_layers = zstex ? util_num_layers(&zstex->buffer.b.b, zsbuf->u.tex.level) : 0;
897 
898    if (zstex && zsbuf->u.tex.first_layer == 0 &&
899        zsbuf->u.tex.last_layer == zs_num_layers - 1 &&
900        si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS)) {
901       unsigned level = zsbuf->u.tex.level;
902       bool update_db_depth_clear = false;
903       bool update_db_stencil_clear = false;
904       bool fb_too_small = num_pixels * zs_num_layers <= 512 * 512;
905 
906       /* Transition from TC-incompatible to TC-compatible HTILE if requested. */
907       if (zstex->enable_tc_compatible_htile_next_clear) {
908          assert(zstex->buffer.b.b.last_level == 0);
909          assert(!zstex->tc_compatible_htile);
910 
911          /* Decompress both depth and stencil. TC-compatible HTILE uses slightly different
912           * compression, so we must decompress before we change it.
913           *
914           * The clear isn't just memset. It still reads HTILE and decides what to do based on that.
915           * We need to decompress fully, so that HTILE doesn't contain any compression flags.
916           */
917          si_decompress_subresource(&sctx->b, zsbuf->texture, PIPE_MASK_ZS, 0, 0,
918                                    util_max_layer(zsbuf->texture, 0), false);
919 
920          /* Enable TC-compatible HTILE. */
921          zstex->enable_tc_compatible_htile_next_clear = false;
922          zstex->tc_compatible_htile = true;
923 
924          /* Update the framebuffer state to reflect the change. */
925          sctx->framebuffer.DB_has_shader_readable_metadata = true;
926          sctx->framebuffer.dirty_zsbuf = true;
927          si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
928 
929          /* Update all sampler views and shader images in all contexts. */
930          p_atomic_inc(&sctx->screen->dirty_tex_counter);
931       }
932 
933       if (num_clears || !fb_too_small) {
934          /* This is where the HTILE buffer clear is done.
935           *
936           * If there is no clear scheduled and the framebuffer size is too small, we should use
937           * the draw-based clear that is without waits. If there is some other clear scheduled,
938           * we will have to wait anyway, so add the HTILE buffer clear to the batch here.
939           * If the framebuffer size is large enough, use this codepath too.
940           */
941          uint64_t htile_offset = zstex->surface.meta_offset;
942          unsigned htile_size = 0;
943 
944          /* Determine the HTILE subset to clear. */
945          if (sctx->gfx_level >= GFX10) {
946             /* This can only clear a layered texture with 1 level or a mipmap texture
947              * with 1 layer. Other cases are unimplemented.
948              */
949             if (zs_num_layers == 1) {
950                /* Clear a specific level. */
951                htile_offset += zstex->surface.u.gfx9.meta_levels[level].offset;
952                htile_size = zstex->surface.u.gfx9.meta_levels[level].size;
953             } else if (zstex->buffer.b.b.last_level == 0) {
954                /* Clear all layers having only 1 level. */
955                htile_size = zstex->surface.meta_size;
956             }
957          } else {
958             /* This can only clear a layered texture with 1 level. Other cases are
959              * unimplemented.
960              */
961             if (zstex->buffer.b.b.last_level == 0)
962                htile_size = zstex->surface.meta_size;
963          }
964 
965          /* Perform the clear if it's possible. */
966          if (zstex->htile_stencil_disabled || !zstex->surface.has_stencil) {
967             if (htile_size &&
968                 si_can_fast_clear_depth(zstex, level, depth, *buffers)) {
969                /* Z-only clear. */
970                assert(num_clears < ARRAY_SIZE(info));
971                si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
972                                     htile_size, si_get_htile_clear_value(zstex, depth));
973                clear_types |= SI_CLEAR_TYPE_HTILE;
974                *buffers &= ~PIPE_CLEAR_DEPTH;
975                zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
976                zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
977                update_db_depth_clear = true;
978             }
979          } else if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
980             if (htile_size &&
981                 si_can_fast_clear_depth(zstex, level, depth, *buffers) &&
982                 si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {
983                /* Combined Z+S clear. */
984                assert(num_clears < ARRAY_SIZE(info));
985                si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
986                                     htile_size, si_get_htile_clear_value(zstex, depth));
987                clear_types |= SI_CLEAR_TYPE_HTILE;
988                *buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
989                zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
990                zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
991                zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level);
992                update_db_depth_clear = true;
993                update_db_stencil_clear = true;
994             }
995          } else {
996             /* Z-only or S-only clear when both Z/S are present using a read-modify-write
997              * compute shader.
998              *
999              * If we get both clears but only one of them can be fast-cleared, we use
1000              * the draw-based fast clear to do both at the same time.
1001              */
1002             const uint32_t htile_depth_writemask = 0xfffffc0f;
1003             const uint32_t htile_stencil_writemask = 0x000003f0;
1004 
1005             if (htile_size &&
1006                 !(*buffers & PIPE_CLEAR_STENCIL) &&
1007                 si_can_fast_clear_depth(zstex, level, depth, *buffers)) {
1008                /* Z-only clear with stencil left intact. */
1009                assert(num_clears < ARRAY_SIZE(info));
1010                si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1011                                         htile_size, si_get_htile_clear_value(zstex, depth),
1012                                         htile_depth_writemask);
1013                clear_types |= SI_CLEAR_TYPE_HTILE;
1014                *buffers &= ~PIPE_CLEAR_DEPTH;
1015                zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
1016                zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
1017                update_db_depth_clear = true;
1018             } else if (htile_size &&
1019                        !(*buffers & PIPE_CLEAR_DEPTH) &&
1020                        si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {
1021                /* Stencil-only clear with depth left intact. */
1022                assert(num_clears < ARRAY_SIZE(info));
1023                si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1024                                         htile_size, si_get_htile_clear_value(zstex, depth),
1025                                         htile_stencil_writemask);
1026                clear_types |= SI_CLEAR_TYPE_HTILE;
1027                *buffers &= ~PIPE_CLEAR_STENCIL;
1028                zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level);
1029                update_db_stencil_clear = true;
1030             }
1031          }
1032 
1033          zstex->need_flush_after_depth_decompression = update_db_depth_clear && sctx->gfx_level == GFX10_3;
1034       }
1035 
1036       /* Update DB_DEPTH_CLEAR. */
1037       if (update_db_depth_clear &&
1038           zstex->depth_clear_value[level] != (float)depth) {
1039          zstex->depth_clear_value[level] = depth;
1040          sctx->framebuffer.dirty_zsbuf = true;
1041          si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1042       }
1043 
1044       /* Update DB_STENCIL_CLEAR. */
1045       if (update_db_stencil_clear &&
1046           zstex->stencil_clear_value[level] != stencil) {
1047          zstex->stencil_clear_value[level] = stencil;
1048          sctx->framebuffer.dirty_zsbuf = true;
1049          si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1050       }
1051    }
1052 
1053    if (num_clears) {
1054       si_barrier_before_image_fast_clear(sctx, clear_types);
1055       si_execute_clears(sctx, info, num_clears, sctx->render_cond_enabled);
1056       si_barrier_after_image_fast_clear(sctx);
1057    }
1058 }
1059 
si_fb_clear_via_compute(struct si_context * sctx,unsigned * buffers,const union pipe_color_union * color)1060 static void si_fb_clear_via_compute(struct si_context *sctx, unsigned *buffers,
1061                                     const union pipe_color_union *color)
1062 {
1063    struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
1064 
1065    unsigned color_buffer_mask = (*buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);
1066    while (color_buffer_mask) {
1067       unsigned i = u_bit_scan(&color_buffer_mask);
1068 
1069       struct pipe_surface *surf = fb->cbufs[i];
1070       unsigned depth = surf->u.tex.last_layer - surf->u.tex.first_layer + 1;
1071       struct si_texture *tex = (struct si_texture *)surf->texture;
1072 
1073       /* If DCC is enable (which can happen with thick tiling on gfx8, don't use compute to get
1074        * compressed clears.
1075        */
1076       if (vi_dcc_enabled(tex, surf->u.tex.level))
1077          continue;
1078 
1079       /* Clears of thick and linear layouts are fastest with compute. */
1080       if (tex->surface.thick_tiling ||
1081           (tex->surface.is_linear && (surf->height > 1 || depth > 1 || surf->width >= 8192))) {
1082          struct pipe_box box;
1083 
1084          u_box_3d(0, 0, surf->u.tex.first_layer, surf->width, surf->height, depth, &box);
1085 
1086          if (si_compute_clear_image(sctx, &tex->buffer.b.b, surf->format, surf->u.tex.level, &box,
1087                                     color, sctx->render_cond_enabled, true))
1088             *buffers &= ~(PIPE_CLEAR_COLOR0 << i); /* success */
1089       }
1090    }
1091 }
1092 
gfx6_clear(struct pipe_context * ctx,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)1093 static void gfx6_clear(struct pipe_context *ctx, unsigned buffers,
1094                        const struct pipe_scissor_state *scissor_state,
1095                        const union pipe_color_union *color, double depth, unsigned stencil)
1096 {
1097    struct si_context *sctx = (struct si_context *)ctx;
1098    struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
1099    struct pipe_surface *zsbuf = fb->zsbuf;
1100    struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL;
1101 
1102    /* Unset clear flags for non-existent buffers. */
1103    for (unsigned i = 0; i < 8; i++) {
1104       if (i >= fb->nr_cbufs || !fb->cbufs[i])
1105          buffers &= ~(PIPE_CLEAR_COLOR0 << i);
1106    }
1107    if (!zsbuf)
1108       buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
1109    else if (!util_format_has_stencil(util_format_description(zsbuf->format)))
1110       buffers &= ~PIPE_CLEAR_STENCIL;
1111 
1112    si_fast_clear(sctx, &buffers, color, depth, stencil);
1113    if (!buffers)
1114       return; /* all buffers have been cleared */
1115 
1116    si_fb_clear_via_compute(sctx, &buffers, color);
1117    if (!buffers)
1118       return; /* all buffers have been cleared */
1119 
1120    if (buffers & PIPE_CLEAR_COLOR) {
1121       /* These buffers cannot use fast clear, make sure to disable expansion. */
1122       unsigned color_buffer_mask = (buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);
1123       while (color_buffer_mask) {
1124          unsigned i = u_bit_scan(&color_buffer_mask);
1125          struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture;
1126          if (tex->surface.fmask_size == 0)
1127             tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
1128       }
1129    }
1130 
1131    if (zstex && zsbuf->u.tex.first_layer == 0 &&
1132        zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {
1133       unsigned level = zsbuf->u.tex.level;
1134 
1135       if (si_can_fast_clear_depth(zstex, level, depth, buffers)) {
1136          /* Need to disable EXPCLEAR temporarily if clearing
1137           * to a new value. */
1138          if (!(zstex->depth_cleared_level_mask_once & BITFIELD_BIT(level)) ||
1139              zstex->depth_clear_value[level] != depth) {
1140             sctx->db_depth_disable_expclear = true;
1141          }
1142 
1143          if (zstex->depth_clear_value[level] != (float)depth) {
1144             if ((zstex->depth_clear_value[level] != 0) != (depth != 0)) {
1145                /* ZRANGE_PRECISION register of a bound surface will change so we
1146                 * must flush the DB caches. */
1147                sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_DB;
1148                si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
1149             }
1150             /* Update DB_DEPTH_CLEAR. */
1151             zstex->depth_clear_value[level] = depth;
1152             sctx->framebuffer.dirty_zsbuf = true;
1153             si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1154          }
1155          sctx->db_depth_clear = true;
1156          si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1157       }
1158 
1159       if (si_can_fast_clear_stencil(zstex, level, stencil, buffers)) {
1160          stencil &= 0xff;
1161 
1162          /* Need to disable EXPCLEAR temporarily if clearing
1163           * to a new value. */
1164          if (!(zstex->stencil_cleared_level_mask_once & BITFIELD_BIT(level)) ||
1165              zstex->stencil_clear_value[level] != stencil) {
1166             sctx->db_stencil_disable_expclear = true;
1167          }
1168 
1169          if (zstex->stencil_clear_value[level] != (uint8_t)stencil) {
1170             /* Update DB_STENCIL_CLEAR. */
1171             zstex->stencil_clear_value[level] = stencil;
1172             sctx->framebuffer.dirty_zsbuf = true;
1173             si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1174          }
1175          sctx->db_stencil_clear = true;
1176          si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1177       }
1178 
1179       /* TODO: This hack fixes dEQP-GLES[23].functional.fragment_ops.random.* on Navi31.
1180        * The root cause is unknown.
1181        */
1182       if (sctx->gfx_level == GFX11 || sctx->gfx_level == GFX11_5) {
1183          sctx->barrier_flags |= SI_BARRIER_SYNC_VS;
1184          si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
1185       }
1186    }
1187 
1188    if (unlikely(sctx->sqtt_enabled)) {
1189       if (buffers & PIPE_CLEAR_COLOR)
1190          sctx->sqtt_next_event = EventCmdClearColorImage;
1191       else if (buffers & PIPE_CLEAR_DEPTHSTENCIL)
1192          sctx->sqtt_next_event = EventCmdClearDepthStencilImage;
1193    }
1194 
1195    si_blitter_begin(sctx, SI_CLEAR);
1196    util_blitter_clear(sctx->blitter, fb->width, fb->height, util_framebuffer_get_num_layers(fb),
1197                       buffers, color, depth, stencil, sctx->framebuffer.nr_samples > 1);
1198    si_blitter_end(sctx);
1199 
1200    if (sctx->db_depth_clear) {
1201       sctx->db_depth_clear = false;
1202       sctx->db_depth_disable_expclear = false;
1203       zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(zsbuf->u.tex.level);
1204       zstex->depth_cleared_level_mask |= BITFIELD_BIT(zsbuf->u.tex.level);
1205       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1206    }
1207 
1208    if (sctx->db_stencil_clear) {
1209       sctx->db_stencil_clear = false;
1210       sctx->db_stencil_disable_expclear = false;
1211       zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(zsbuf->u.tex.level);
1212       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1213    }
1214 }
1215 
gfx12_clear(struct pipe_context * ctx,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)1216 static void gfx12_clear(struct pipe_context *ctx, unsigned buffers,
1217                         const struct pipe_scissor_state *scissor_state,
1218                         const union pipe_color_union *color, double depth, unsigned stencil)
1219 {
1220    struct si_context *sctx = (struct si_context *)ctx;
1221    struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
1222    struct pipe_surface *zsbuf = fb->zsbuf;
1223    struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL;
1224 
1225    /* Unset clear flags for non-existent buffers. */
1226    for (unsigned i = 0; i < 8; i++) {
1227       if (i >= fb->nr_cbufs || !fb->cbufs[i])
1228          buffers &= ~(PIPE_CLEAR_COLOR0 << i);
1229    }
1230    if (!zsbuf)
1231       buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
1232    else if (!util_format_has_stencil(util_format_description(zsbuf->format)))
1233       buffers &= ~PIPE_CLEAR_STENCIL;
1234 
1235    if (unlikely(sctx->sqtt_enabled)) {
1236       if (buffers & PIPE_CLEAR_COLOR)
1237          sctx->sqtt_next_event = EventCmdClearColorImage;
1238       else if (buffers & PIPE_CLEAR_DEPTHSTENCIL)
1239          sctx->sqtt_next_event = EventCmdClearDepthStencilImage;
1240    }
1241 
1242    si_blitter_begin(sctx, SI_CLEAR);
1243    util_blitter_clear(sctx->blitter, fb->width, fb->height, util_framebuffer_get_num_layers(fb),
1244                       buffers, color, depth, stencil, sctx->framebuffer.nr_samples > 1);
1245    si_blitter_end(sctx);
1246 
1247    /* This is only used by the driver, not the hw. */
1248    if (buffers & PIPE_CLEAR_DEPTH) {
1249       zstex->depth_cleared_level_mask |= BITFIELD_BIT(zsbuf->u.tex.level);
1250       zstex->depth_clear_value[zsbuf->u.tex.level] = depth;
1251    }
1252 }
1253 
si_try_normal_clear(struct si_context * sctx,struct pipe_surface * dst,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled,unsigned buffers,const union pipe_color_union * color,float depth,unsigned stencil)1254 static bool si_try_normal_clear(struct si_context *sctx, struct pipe_surface *dst,
1255                                 unsigned dstx, unsigned dsty, unsigned width, unsigned height,
1256                                 bool render_condition_enabled, unsigned buffers,
1257                                 const union pipe_color_union *color,
1258                                 float depth, unsigned stencil)
1259 {
1260    /* This is worth it only if it's a whole image clear. */
1261    if (dstx == 0 && dsty == 0 &&
1262        width == dst->width &&
1263        height == dst->height &&
1264        dst->u.tex.first_layer == 0 &&
1265        dst->u.tex.last_layer == util_max_layer(dst->texture, dst->u.tex.level) &&
1266        /* pipe->clear honors render_condition, so only use it if it's unset or if it's set and enabled. */
1267        (!sctx->render_cond || render_condition_enabled) &&
1268        sctx->has_graphics) {
1269       struct pipe_context *ctx = &sctx->b;
1270       struct pipe_framebuffer_state saved_fb = {}, fb = {};
1271 
1272       util_copy_framebuffer_state(&saved_fb, &sctx->framebuffer.state);
1273 
1274       if (buffers & PIPE_CLEAR_COLOR) {
1275          fb.cbufs[0] = dst;
1276          fb.nr_cbufs = 1;
1277       } else {
1278          fb.zsbuf = dst;
1279       }
1280 
1281       fb.width = dst->width;
1282       fb.height = dst->height;
1283 
1284       ctx->set_framebuffer_state(ctx, &fb);
1285       ctx->clear(ctx, buffers, NULL, color, depth, stencil);
1286       ctx->set_framebuffer_state(ctx, &saved_fb);
1287 
1288       util_copy_framebuffer_state(&saved_fb, NULL);
1289 
1290       return true;
1291    }
1292 
1293    return false;
1294 }
1295 
si_compute_fast_clear_image(struct si_context * sctx,struct pipe_resource * dst,enum pipe_format format,unsigned level,const struct pipe_box * box,const union pipe_color_union * color,bool render_condition_enable,bool fail_if_slow)1296 bool si_compute_fast_clear_image(struct si_context *sctx, struct pipe_resource *dst,
1297                                  enum pipe_format format, unsigned level, const struct pipe_box *box,
1298                                  const union pipe_color_union *color, bool render_condition_enable,
1299                                  bool fail_if_slow)
1300 {
1301    struct si_texture *sdst = (struct si_texture*)dst;
1302 
1303    if (!vi_dcc_enabled(sdst, level))
1304       return false;
1305 
1306    /* Only the whole image can be cleared. */
1307    if (box->x != 0 || box->y != 0 || box->width != u_minify(dst->width0, level) ||
1308        box->height != u_minify(dst->height0, level) || box->depth != util_num_layers(dst, level))
1309       return false;
1310 
1311    uint32_t dcc_value;
1312    bool eliminate_needed;
1313 
1314    /* Get the DCC clear value. */
1315    if (sctx->gfx_level >= GFX11) {
1316       if (!gfx11_get_dcc_clear_parameters(sctx->screen, sdst, level, format,
1317                                           color, &dcc_value, fail_if_slow))
1318          return false;
1319    } else {
1320       if (!gfx8_get_dcc_clear_parameters(sctx->screen, dst->format, format, color, &dcc_value,
1321                                          &eliminate_needed) ||
1322           eliminate_needed)
1323          return false;
1324    }
1325 
1326    /* Get DCC clear info. */
1327    struct si_clear_info info[3]; /* DCC + CMASK + clear_image_dcc_single */
1328    unsigned num_clears = 0, clear_types = 0;
1329 
1330    if (!vi_dcc_get_clear_info(sctx, sdst, level, dcc_value, &info[num_clears]))
1331       return false;
1332 
1333    num_clears++;
1334    clear_types |= SI_CLEAR_TYPE_DCC;
1335    si_mark_display_dcc_dirty(sctx, sdst);
1336 
1337    if (sctx->gfx_level >= GFX11 && dcc_value == GFX11_DCC_CLEAR_SINGLE) {
1338       /* Put this clear first by moving other clears after it because this clear has
1339        * the most GPU overhead.
1340        */
1341       memmove(&info[1], &info[0], sizeof(info[0]) * num_clears);
1342       si_init_clear_image_dcc_single(&info[0], sdst, level, format, color);
1343       num_clears++;
1344    }
1345 
1346    /* DCC fast clear with MSAA should clear CMASK to 0xC. */
1347    if (dst->nr_samples >= 2 && sdst->cmask_buffer) {
1348       assert(sctx->gfx_level < GFX11); /* no FMASK/CMASK on GFX11 */
1349       assert(num_clears < ARRAY_SIZE(info));
1350       si_init_buffer_clear(&info[num_clears++], &sdst->cmask_buffer->b.b,
1351                            sdst->surface.cmask_offset, sdst->surface.cmask_size, 0xCCCCCCCC);
1352       clear_types |= SI_CLEAR_TYPE_CMASK;
1353 
1354       if (!(sdst->dirty_level_mask & BITFIELD_BIT(level))) {
1355          sdst->dirty_level_mask |= BITFIELD_BIT(level);
1356          p_atomic_inc(&sctx->screen->compressed_colortex_counter);
1357       }
1358    }
1359 
1360    assert(num_clears <= ARRAY_SIZE(info));
1361    si_barrier_before_image_fast_clear(sctx, clear_types);
1362    si_execute_clears(sctx, info, num_clears, render_condition_enable);
1363    si_barrier_after_image_fast_clear(sctx);
1364    return true;
1365 }
1366 
si_clear_render_target(struct pipe_context * ctx,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)1367 static void si_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dst,
1368                                    const union pipe_color_union *color, unsigned dstx,
1369                                    unsigned dsty, unsigned width, unsigned height,
1370                                    bool render_condition_enabled)
1371 {
1372    struct si_context *sctx = (struct si_context *)ctx;
1373    struct si_texture *sdst = (struct si_texture *)dst->texture;
1374 
1375    /* For older chips that can do fast clear with any clear color (using GFX8_DCC_CLEAR_REG
1376     * or CMASK).
1377     */
1378    if (sctx->gfx_level <= GFX10_3 &&
1379        (vi_dcc_enabled(sdst, dst->u.tex.level) ||
1380         /* GFX6-9 allow CMASK without MSAA and allocate it on demand, but only 8-64bpp. */
1381         (sctx->gfx_level <= GFX9 && sdst->surface.bpe <= 8)) &&
1382        si_try_normal_clear(sctx, dst, dstx, dsty, width, height, render_condition_enabled,
1383                            PIPE_CLEAR_COLOR0, color, 0, 0))
1384       return;
1385 
1386    struct pipe_box box;
1387    u_box_3d(dstx, dsty, dst->u.tex.first_layer, width, height,
1388             dst->u.tex.last_layer - dst->u.tex.first_layer + 1, &box);
1389 
1390    if (si_compute_fast_clear_image(sctx, dst->texture, dst->format, dst->u.tex.level, &box, color,
1391                                    render_condition_enabled, true))
1392       return;
1393 
1394    if (si_compute_clear_image(sctx, dst->texture, dst->format, dst->u.tex.level, &box, color,
1395                               render_condition_enabled, true))
1396       return;
1397 
1398    si_gfx_clear_render_target(ctx, dst, color, dstx, dsty, width, height,
1399                               render_condition_enabled);
1400 }
1401 
si_gfx_clear_render_target(struct pipe_context * ctx,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)1402 void si_gfx_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dst,
1403                                 const union pipe_color_union *color, unsigned dstx,
1404                                 unsigned dsty, unsigned width, unsigned height,
1405                                 bool render_condition_enabled)
1406 {
1407    struct si_context *sctx = (struct si_context *)ctx;
1408 
1409    si_blitter_begin(sctx,
1410                     SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
1411    util_blitter_clear_render_target(sctx->blitter, dst, color, dstx, dsty, width, height);
1412    si_blitter_end(sctx);
1413 }
1414 
si_clear_depth_stencil(struct pipe_context * ctx,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)1415 static void si_clear_depth_stencil(struct pipe_context *ctx, struct pipe_surface *dst,
1416                                    unsigned clear_flags, double depth, unsigned stencil,
1417                                    unsigned dstx, unsigned dsty, unsigned width, unsigned height,
1418                                    bool render_condition_enabled)
1419 {
1420    struct si_context *sctx = (struct si_context *)ctx;
1421    union pipe_color_union unused = {};
1422 
1423    /* Fast path that just clears HTILE. */
1424    if (si_try_normal_clear(sctx, dst, dstx, dsty, width, height, render_condition_enabled,
1425                            clear_flags, &unused, depth, stencil))
1426       return;
1427 
1428    si_blitter_begin(sctx,
1429                     SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
1430    util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty,
1431                                     width, height);
1432    si_blitter_end(sctx);
1433 }
1434 
si_init_clear_functions(struct si_context * sctx)1435 void si_init_clear_functions(struct si_context *sctx)
1436 {
1437    sctx->b.clear_render_target = si_clear_render_target;
1438    sctx->b.clear_texture = u_default_clear_texture;
1439 
1440    if (sctx->has_graphics) {
1441       if (sctx->gfx_level >= GFX12)
1442          sctx->b.clear = gfx12_clear;
1443       else
1444          sctx->b.clear = gfx6_clear;
1445 
1446       sctx->b.clear_depth_stencil = si_clear_depth_stencil;
1447    }
1448 }
1449