• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "si_pipe.h"
8 #include "sid.h"
9 #include "util/format/u_format.h"
10 #include "util/u_pack_color.h"
11 #include "util/u_surface.h"
12 
13 enum {
14    SI_CLEAR = SI_SAVE_FRAGMENT_STATE | SI_SAVE_FRAGMENT_CONSTANT,
15    SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
16 };
17 
si_init_buffer_clear(struct si_clear_info * info,struct pipe_resource * resource,uint64_t offset,uint32_t size,uint32_t clear_value)18 void si_init_buffer_clear(struct si_clear_info *info,
19                           struct pipe_resource *resource, uint64_t offset,
20                           uint32_t size, uint32_t clear_value)
21 {
22    info->resource = resource;
23    info->offset = offset;
24    info->size = size;
25    info->clear_value = clear_value;
26    info->writemask = 0xffffffff;
27    info->is_dcc_msaa = false;
28 }
29 
si_init_buffer_clear_rmw(struct si_clear_info * info,struct pipe_resource * resource,uint64_t offset,uint32_t size,uint32_t clear_value,uint32_t writemask)30 static void si_init_buffer_clear_rmw(struct si_clear_info *info,
31                                      struct pipe_resource *resource, uint64_t offset,
32                                      uint32_t size, uint32_t clear_value, uint32_t writemask)
33 {
34    si_init_buffer_clear(info, resource, offset, size, clear_value);
35    info->writemask = writemask;
36 }
37 
si_execute_clears(struct si_context * sctx,struct si_clear_info * info,unsigned num_clears,unsigned types)38 void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
39                        unsigned num_clears, unsigned types)
40 {
41    if (!num_clears)
42       return;
43 
44    /* Flush caches and wait for idle. */
45    if (types & (SI_CLEAR_TYPE_CMASK | SI_CLEAR_TYPE_DCC))
46       sctx->flags |= si_get_flush_flags(sctx, SI_COHERENCY_CB_META, L2_LRU);
47 
48    if (types & SI_CLEAR_TYPE_HTILE)
49       sctx->flags |= si_get_flush_flags(sctx, SI_COHERENCY_DB_META, L2_LRU);
50 
51    /* Flush caches in case we use compute. */
52    sctx->flags |= SI_CONTEXT_INV_VCACHE;
53 
54    /* GFX6-8: CB and DB don't use L2. */
55    if (sctx->gfx_level <= GFX8)
56       sctx->flags |= SI_CONTEXT_INV_L2;
57 
58    si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
59 
60    /* Execute clears. */
61    for (unsigned i = 0; i < num_clears; i++) {
62       if (info[i].is_dcc_msaa) {
63          gfx9_clear_dcc_msaa(sctx, info[i].resource, info[i].clear_value,
64                              SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP);
65          continue;
66       }
67 
68       assert(info[i].size > 0);
69 
70       if (info[i].writemask != 0xffffffff) {
71          si_compute_clear_buffer_rmw(sctx, info[i].resource, info[i].offset, info[i].size,
72                                      info[i].clear_value, info[i].writemask,
73                                      SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP);
74       } else {
75          /* Compute shaders are much faster on both dGPUs and APUs. Don't use CP DMA. */
76          si_clear_buffer(sctx, info[i].resource, info[i].offset, info[i].size,
77                          &info[i].clear_value, 4, SI_OP_SKIP_CACHE_INV_BEFORE,
78                          SI_COHERENCY_CP, SI_COMPUTE_CLEAR_METHOD);
79       }
80    }
81 
82    /* Wait for idle. */
83    sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
84 
85    /* GFX6-8: CB and DB don't use L2. */
86    if (sctx->gfx_level <= GFX8)
87       sctx->flags |= SI_CONTEXT_WB_L2;
88 
89    si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
90 }
91 
si_alloc_separate_cmask(struct si_screen * sscreen,struct si_texture * tex)92 static bool si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex)
93 {
94    assert(sscreen->info.gfx_level < GFX11);
95 
96    /* CMASK for MSAA is allocated in advance or always disabled
97     * by "nofmask" option.
98     */
99    if (tex->cmask_buffer)
100       return true;
101 
102    if (!tex->surface.cmask_size)
103       return false;
104 
105    tex->cmask_buffer =
106       si_aligned_buffer_create(&sscreen->b, PIPE_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT,
107                                tex->surface.cmask_size, 1 << tex->surface.cmask_alignment_log2);
108    if (tex->cmask_buffer == NULL)
109       return false;
110 
111    /* These 2 fields are part of the framebuffer state but dirtying the atom
112     * will be done by the caller.
113     */
114    tex->cmask_base_address_reg = tex->cmask_buffer->gpu_address >> 8;
115    tex->cb_color_info |= S_028C70_FAST_CLEAR(1);
116 
117    p_atomic_inc(&sscreen->compressed_colortex_counter);
118    return true;
119 }
120 
si_set_clear_color(struct si_texture * tex,enum pipe_format surface_format,const union pipe_color_union * color)121 static bool si_set_clear_color(struct si_texture *tex, enum pipe_format surface_format,
122                                const union pipe_color_union *color)
123 {
124    union util_color uc;
125 
126    memset(&uc, 0, sizeof(uc));
127 
128    if (tex->surface.bpe == 16) {
129       /* DCC fast clear only:
130        *   CLEAR_WORD0 = R = G = B
131        *   CLEAR_WORD1 = A
132        */
133       assert(color->ui[0] == color->ui[1] && color->ui[0] == color->ui[2]);
134       uc.ui[0] = color->ui[0];
135       uc.ui[1] = color->ui[3];
136    } else {
137       if (tex->swap_rgb_to_bgr)
138          surface_format = util_format_rgb_to_bgr(surface_format);
139 
140       util_pack_color_union(surface_format, &uc, color);
141    }
142 
143    if (memcmp(tex->color_clear_value, &uc, 2 * sizeof(uint32_t)) == 0)
144       return false;
145 
146    memcpy(tex->color_clear_value, &uc, 2 * sizeof(uint32_t));
147    return true;
148 }
149 
150 /** Linearize and convert luminance/intensity to red. */
si_simplify_cb_format(enum pipe_format format)151 enum pipe_format si_simplify_cb_format(enum pipe_format format)
152 {
153    format = util_format_linear(format);
154    format = util_format_luminance_to_red(format);
155    return util_format_intensity_to_red(format);
156 }
157 
vi_alpha_is_on_msb(struct si_screen * sscreen,enum pipe_format format)158 bool vi_alpha_is_on_msb(struct si_screen *sscreen, enum pipe_format format)
159 {
160    if (sscreen->info.gfx_level >= GFX11)
161       return false;
162 
163    format = si_simplify_cb_format(format);
164    const struct util_format_description *desc = util_format_description(format);
165    unsigned comp_swap = si_translate_colorswap(sscreen->info.gfx_level, format, false);
166 
167    /* The following code matches the hw behavior. */
168    if (desc->nr_channels == 1) {
169       return (comp_swap == V_028C70_SWAP_ALT_REV) != (sscreen->info.family == CHIP_RAVEN2 ||
170                                                       sscreen->info.family == CHIP_RENOIR);
171    }
172 
173    return comp_swap != V_028C70_SWAP_STD_REV && comp_swap != V_028C70_SWAP_ALT_REV;
174 }
175 
gfx8_get_dcc_clear_parameters(struct si_screen * sscreen,enum pipe_format base_format,enum pipe_format surface_format,const union pipe_color_union * color,uint32_t * clear_value,bool * eliminate_needed)176 static bool gfx8_get_dcc_clear_parameters(struct si_screen *sscreen, enum pipe_format base_format,
177                                           enum pipe_format surface_format,
178                                           const union pipe_color_union *color, uint32_t *clear_value,
179                                           bool *eliminate_needed)
180 {
181    /* If we want to clear without needing a fast clear eliminate step, we
182     * can set color and alpha independently to 0 or 1 (or 0/max for integer
183     * formats).
184     */
185    bool values[4] = {};      /* whether to clear to 0 or 1 */
186    bool color_value = false; /* clear color to 0 or 1 */
187    bool alpha_value = false; /* clear alpha to 0 or 1 */
188    int alpha_channel;        /* index of the alpha component */
189    bool has_color = false;
190    bool has_alpha = false;
191 
192    const struct util_format_description *desc =
193       util_format_description(si_simplify_cb_format(surface_format));
194 
195    /* 128-bit fast clear with different R,G,B values is unsupported. */
196    if (desc->block.bits == 128 && (color->ui[0] != color->ui[1] || color->ui[0] != color->ui[2]))
197       return false;
198 
199    *eliminate_needed = true;
200    *clear_value = GFX8_DCC_CLEAR_REG;
201 
202    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
203       return true; /* need ELIMINATE_FAST_CLEAR */
204 
205    bool base_alpha_is_on_msb = vi_alpha_is_on_msb(sscreen, base_format);
206    bool surf_alpha_is_on_msb = vi_alpha_is_on_msb(sscreen, surface_format);
207 
208    /* Formats with 3 channels can't have alpha. */
209    if (desc->nr_channels == 3)
210       alpha_channel = -1;
211    else if (surf_alpha_is_on_msb)
212       alpha_channel = desc->nr_channels - 1;
213    else
214       alpha_channel = 0;
215 
216    for (int i = 0; i < 4; ++i) {
217       if (desc->swizzle[i] >= PIPE_SWIZZLE_0)
218          continue;
219 
220       if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
221          /* Use the maximum value for clamping the clear color. */
222          int max = u_bit_consecutive(0, desc->channel[i].size - 1);
223 
224          values[i] = color->i[i] != 0;
225          if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
226             return true; /* need ELIMINATE_FAST_CLEAR */
227       } else if (desc->channel[i].pure_integer &&
228                  desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
229          /* Use the maximum value for clamping the clear color. */
230          unsigned max = u_bit_consecutive(0, desc->channel[i].size);
231 
232          values[i] = color->ui[i] != 0U;
233          if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)
234             return true; /* need ELIMINATE_FAST_CLEAR */
235       } else {
236          values[i] = color->f[i] != 0.0F;
237          if (color->f[i] != 0.0F && color->f[i] != 1.0F)
238             return true; /* need ELIMINATE_FAST_CLEAR */
239       }
240 
241       if (desc->swizzle[i] == alpha_channel) {
242          alpha_value = values[i];
243          has_alpha = true;
244       } else {
245          color_value = values[i];
246          has_color = true;
247       }
248    }
249 
250    /* If alpha isn't present, make it the same as color, and vice versa. */
251    if (!has_alpha)
252       alpha_value = color_value;
253    else if (!has_color)
254       color_value = alpha_value;
255 
256    if (color_value != alpha_value && base_alpha_is_on_msb != surf_alpha_is_on_msb)
257       return true; /* require ELIMINATE_FAST_CLEAR */
258 
259    /* Check if all color values are equal if they are present. */
260    for (int i = 0; i < 4; ++i) {
261       if (desc->swizzle[i] <= PIPE_SWIZZLE_W && desc->swizzle[i] != alpha_channel &&
262           values[i] != color_value)
263          return true; /* require ELIMINATE_FAST_CLEAR */
264    }
265 
266    /* This doesn't need ELIMINATE_FAST_CLEAR.
267     * On chips predating Raven2, the DCC clear codes and the CB clear
268     * color registers must match.
269     */
270    *eliminate_needed = false;
271 
272    if (color_value) {
273       if (alpha_value)
274          *clear_value = GFX8_DCC_CLEAR_1111;
275       else
276          *clear_value = GFX8_DCC_CLEAR_1110;
277    } else {
278       if (alpha_value)
279          *clear_value = GFX8_DCC_CLEAR_0001;
280       else
281          *clear_value = GFX8_DCC_CLEAR_0000;
282    }
283    return true;
284 }
285 
gfx11_get_dcc_clear_parameters(struct si_screen * sscreen,enum pipe_format surface_format,const union pipe_color_union * color,uint32_t * clear_value)286 static bool gfx11_get_dcc_clear_parameters(struct si_screen *sscreen, enum pipe_format surface_format,
287                                            const union pipe_color_union *color, uint32_t *clear_value)
288 {
289    const struct util_format_description *desc =
290       util_format_description(si_simplify_cb_format(surface_format));
291    unsigned start_bit = UINT_MAX;
292    unsigned end_bit = 0;
293 
294    /* TODO: 8bpp and 16bpp fast DCC clears don't work. */
295    if (desc->block.bits <= 16)
296       return false;
297 
298    /* Find the used bit range. */
299    for (unsigned i = 0; i < 4; i++) {
300       unsigned swizzle = desc->swizzle[i];
301 
302       if (swizzle >= PIPE_SWIZZLE_0)
303          continue;
304 
305       start_bit = MIN2(start_bit, desc->channel[swizzle].shift);
306       end_bit = MAX2(end_bit, desc->channel[swizzle].shift + desc->channel[swizzle].size);
307    }
308 
309    union {
310       uint8_t ub[16];
311       uint16_t us[8];
312       uint32_t ui[4];
313    } value = {};
314    util_pack_color_union(surface_format, (union util_color*)&value, color);
315 
316    /* Check the cases where all components or bits are either all 0 or all 1. */
317    bool all_bits_are_0 = true;
318    bool all_bits_are_1 = true;
319    bool all_words_are_fp16_1 = false;
320    bool all_words_are_fp32_1 = false;
321 
322    for (unsigned i = start_bit; i < end_bit; i++) {
323       bool bit = value.ub[i / 8] & BITFIELD_BIT(i % 8);
324 
325       all_bits_are_0 &= !bit;
326       all_bits_are_1 &= bit;
327    }
328 
329    if (start_bit % 16 == 0 && end_bit % 16 == 0) {
330       all_words_are_fp16_1 = true;
331       for (unsigned i = start_bit / 16; i < end_bit / 16; i++)
332          all_words_are_fp16_1 &= value.us[i] == 0x3c00;
333    }
334 
335    if (start_bit % 32 == 0 && end_bit % 32 == 0) {
336       all_words_are_fp32_1 = true;
337       for (unsigned i = start_bit / 32; i < end_bit / 32; i++)
338          all_words_are_fp32_1 &= value.ui[i] == 0x3f800000;
339    }
340 
341 #if 0 /* debug code */
342    int i = util_format_get_first_non_void_channel(surface_format);
343    if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[i].pure_integer) {
344       printf("%i %i %i %i\n", color->i[0], color->i[1], color->i[2], color->i[3]);
345    } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED && desc->channel[i].pure_integer) {
346       printf("%u %u %u %u\n", color->ui[0], color->ui[1], color->ui[2], color->ui[3]);
347    } else {
348       printf("%f %f %f %f\n", color->f[0], color->f[1], color->f[2], color->f[3]);
349    }
350    for (unsigned i = 0; i < end_bit / 8; i++)
351       printf("%02x", value.ub[i]);
352    printf("\n");
353    printf("bits=[%u..%u)%s%s%s%s\n", start_bit, end_bit,
354           all_bits_are_0 ? ", all 0" : "",
355           all_bits_are_1 ? ", all 1" : "",
356           all_words_are_fp16_1 ? ", all fp16 1" : "",
357           all_words_are_fp32_1 ? ", all fp32 1" : "");
358 #endif
359 
360    *clear_value = 0;
361 
362    if (all_bits_are_0 || all_bits_are_1 || all_words_are_fp16_1 || all_words_are_fp32_1) {
363       if (all_bits_are_0)
364          *clear_value = GFX11_DCC_CLEAR_0000;
365       else if (all_bits_are_1)
366          *clear_value = GFX11_DCC_CLEAR_1111_UNORM;
367       else if (all_words_are_fp16_1)
368          *clear_value = GFX11_DCC_CLEAR_1111_FP16;
369       else if (all_words_are_fp32_1)
370          *clear_value = GFX11_DCC_CLEAR_1111_FP32;
371 
372       return true;
373    }
374 
375    /* Check 0001 and 1110 cases. */
376    if (desc->nr_channels == 2 && desc->channel[0].size == 8) {
377       if (value.ub[0] == 0x00 && value.ub[1] == 0xff) {
378          *clear_value = GFX11_DCC_CLEAR_0001_UNORM;
379          return true;
380       } else if (value.ub[0] == 0xff && value.ub[1] == 0x00) {
381          *clear_value = GFX11_DCC_CLEAR_1110_UNORM;
382          return true;
383       }
384    } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) {
385       if (value.ub[0] == 0x00 && value.ub[1] == 0x00 &&
386           value.ub[2] == 0x00 && value.ub[3] == 0xff) {
387          *clear_value = GFX11_DCC_CLEAR_0001_UNORM;
388          return true;
389       } else if (value.ub[0] == 0xff && value.ub[1] == 0xff &&
390                  value.ub[2] == 0xff && value.ub[3] == 0x00) {
391          *clear_value = GFX11_DCC_CLEAR_1110_UNORM;
392          return true;
393       }
394    } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) {
395       if (value.us[0] == 0x0000 && value.us[1] == 0x0000 &&
396           value.us[2] == 0x0000 && value.us[3] == 0xffff) {
397          *clear_value = GFX11_DCC_CLEAR_0001_UNORM;
398          return true;
399       } else if (value.us[0] == 0xffff && value.us[1] == 0xffff &&
400                  value.us[2] == 0xffff && value.us[3] == 0x0000) {
401          *clear_value = GFX11_DCC_CLEAR_1110_UNORM;
402          return true;
403       }
404    }
405 
406    return false;
407 }
408 
vi_dcc_get_clear_info(struct si_context * sctx,struct si_texture * tex,unsigned level,unsigned clear_value,struct si_clear_info * out)409 bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsigned level,
410                            unsigned clear_value, struct si_clear_info *out)
411 {
412    struct pipe_resource *dcc_buffer = &tex->buffer.b.b;
413    uint64_t dcc_offset = tex->surface.meta_offset;
414    uint32_t clear_size;
415 
416    assert(vi_dcc_enabled(tex, level));
417 
418    if (sctx->gfx_level >= GFX10) {
419       /* 4x and 8x MSAA needs a sophisticated compute shader for
420        * the clear. GFX11 doesn't need that.
421        */
422       if (sctx->gfx_level < GFX11 && tex->buffer.b.b.nr_storage_samples >= 4)
423          return false;
424 
425       unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
426 
427       if (num_layers == 1) {
428          /* Clear a specific level. */
429          dcc_offset += tex->surface.u.gfx9.meta_levels[level].offset;
430          clear_size = tex->surface.u.gfx9.meta_levels[level].size;
431       } else if (tex->buffer.b.b.last_level == 0) {
432          /* Clear all layers having only 1 level. */
433          clear_size = tex->surface.meta_size;
434       } else {
435          /* Clearing DCC with both multiple levels and multiple layers is not
436           * implemented.
437           */
438          return false;
439       }
440    } else if (sctx->gfx_level == GFX9) {
441       /* TODO: Implement DCC fast clear for level 0 of mipmapped textures. Mipmapped
442        * DCC has to clear a rectangular area of DCC for level 0 (because the whole miptree
443        * is organized in a 2D plane).
444        */
445       if (tex->buffer.b.b.last_level > 0)
446          return false;
447 
448       /* 4x and 8x MSAA need to clear only sample 0 and 1 in a compute shader and leave other
449        * samples untouched. (only the first 2 samples are compressed) */
450       if (tex->buffer.b.b.nr_storage_samples >= 4) {
451          si_init_buffer_clear(out, dcc_buffer, 0, 0, clear_value);
452          out->is_dcc_msaa = true;
453          return true;
454       }
455 
456       clear_size = tex->surface.meta_size;
457    } else {
458       unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
459 
460       /* If this is 0, fast clear isn't possible. (can occur with MSAA) */
461       if (!tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size)
462          return false;
463 
464       /* Layered 4x and 8x MSAA DCC fast clears need to clear
465        * dcc_fast_clear_size bytes for each layer. A compute shader
466        * would be more efficient than separate per-layer clear operations.
467        */
468       if (tex->buffer.b.b.nr_storage_samples >= 4 && num_layers > 1)
469          return false;
470 
471       dcc_offset += tex->surface.u.legacy.color.dcc_level[level].dcc_offset;
472       clear_size = tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size;
473    }
474 
475    si_init_buffer_clear(out, dcc_buffer, dcc_offset, clear_size, clear_value);
476    return true;
477 }
478 
479 /* Set the same micro tile mode as the destination of the last MSAA resolve.
480  * This allows hitting the MSAA resolve fast path, which requires that both
481  * src and dst micro tile modes match.
482  */
si_set_optimal_micro_tile_mode(struct si_screen * sscreen,struct si_texture * tex)483 static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_texture *tex)
484 {
485    if (sscreen->info.gfx_level >= GFX10 || tex->buffer.b.is_shared ||
486        tex->buffer.b.b.nr_samples <= 1 ||
487        tex->surface.micro_tile_mode == tex->last_msaa_resolve_target_micro_mode)
488       return;
489 
490    assert(sscreen->info.gfx_level >= GFX9 ||
491           tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
492    assert(tex->buffer.b.b.last_level == 0);
493 
494    if (sscreen->info.gfx_level >= GFX9) {
495       /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
496       assert(tex->surface.u.gfx9.swizzle_mode >= 4);
497 
498       /* If you do swizzle_mode % 4, you'll get:
499        *   0 = Depth
500        *   1 = Standard,
501        *   2 = Displayable
502        *   3 = Rotated
503        *
504        * Depth-sample order isn't allowed:
505        */
506       assert(tex->surface.u.gfx9.swizzle_mode % 4 != 0);
507 
508       switch (tex->last_msaa_resolve_target_micro_mode) {
509       case RADEON_MICRO_MODE_DISPLAY:
510          tex->surface.u.gfx9.swizzle_mode &= ~0x3;
511          tex->surface.u.gfx9.swizzle_mode += 2; /* D */
512          break;
513       case RADEON_MICRO_MODE_STANDARD:
514          tex->surface.u.gfx9.swizzle_mode &= ~0x3;
515          tex->surface.u.gfx9.swizzle_mode += 1; /* S */
516          break;
517       case RADEON_MICRO_MODE_RENDER:
518          tex->surface.u.gfx9.swizzle_mode &= ~0x3;
519          tex->surface.u.gfx9.swizzle_mode += 3; /* R */
520          break;
521       default: /* depth */
522          assert(!"unexpected micro mode");
523          return;
524       }
525    } else if (sscreen->info.gfx_level >= GFX7) {
526       /* These magic numbers were copied from addrlib. It doesn't use
527        * any definitions for them either. They are all 2D_TILED_THIN1
528        * modes with different bpp and micro tile mode.
529        */
530       switch (tex->last_msaa_resolve_target_micro_mode) {
531       case RADEON_MICRO_MODE_DISPLAY:
532          tex->surface.u.legacy.tiling_index[0] = 10;
533          break;
534       case RADEON_MICRO_MODE_STANDARD:
535          tex->surface.u.legacy.tiling_index[0] = 14;
536          break;
537       case RADEON_MICRO_MODE_RENDER:
538          tex->surface.u.legacy.tiling_index[0] = 28;
539          break;
540       default: /* depth, thick */
541          assert(!"unexpected micro mode");
542          return;
543       }
544    } else { /* GFX6 */
545       switch (tex->last_msaa_resolve_target_micro_mode) {
546       case RADEON_MICRO_MODE_DISPLAY:
547          switch (tex->surface.bpe) {
548          case 1:
549             tex->surface.u.legacy.tiling_index[0] = 10;
550             break;
551          case 2:
552             tex->surface.u.legacy.tiling_index[0] = 11;
553             break;
554          default: /* 4, 8 */
555             tex->surface.u.legacy.tiling_index[0] = 12;
556             break;
557          }
558          break;
559       case RADEON_MICRO_MODE_STANDARD:
560          switch (tex->surface.bpe) {
561          case 1:
562             tex->surface.u.legacy.tiling_index[0] = 14;
563             break;
564          case 2:
565             tex->surface.u.legacy.tiling_index[0] = 15;
566             break;
567          case 4:
568             tex->surface.u.legacy.tiling_index[0] = 16;
569             break;
570          default: /* 8, 16 */
571             tex->surface.u.legacy.tiling_index[0] = 17;
572             break;
573          }
574          break;
575       default: /* depth, thick */
576          assert(!"unexpected micro mode");
577          return;
578       }
579    }
580 
581    tex->surface.micro_tile_mode = tex->last_msaa_resolve_target_micro_mode;
582 
583    p_atomic_inc(&sscreen->dirty_tex_counter);
584 }
585 
si_get_htile_clear_value(struct si_texture * tex,float depth)586 static uint32_t si_get_htile_clear_value(struct si_texture *tex, float depth)
587 {
588    /* Maximum 14-bit UINT value. */
589    const uint32_t max_z_value = 0x3FFF;
590 
591    /* For clears, Zmask and Smem will always be set to zero. */
592    const uint32_t zmask = 0;
593    const uint32_t smem  = 0;
594 
595    /* Convert depthValue to 14-bit zmin/zmax uint values. */
596    const uint32_t zmin = lroundf(depth * max_z_value);
597    const uint32_t zmax = zmin;
598 
599    if (tex->htile_stencil_disabled) {
600       /* Z-only HTILE is laid out as follows:
601        * |31     18|17      4|3     0|
602        * +---------+---------+-------+
603        * |  Max Z  |  Min Z  | ZMask |
604        */
605       return ((zmax & 0x3FFF) << 18) |
606              ((zmin & 0x3FFF) << 4) |
607              ((zmask & 0xF) << 0);
608    } else {
609       /* Z+S HTILE is laid out as-follows:
610        * |31       12|11 10|9    8|7   6|5   4|3     0|
611        * +-----------+-----+------+-----+-----+-------+
612        * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
613        *
614        * The base value for zRange is either zMax or zMin, depending on ZRANGE_PRECISION.
615        * For a fast clear, zMin == zMax == clearValue. This means that the base will
616        * always be the clear value (converted to 14-bit UINT).
617        *
618        * When abs(zMax-zMin) < 16, the delta is equal to the difference. In the case of
619        * fast clears, where zMax == zMin, the delta is always zero.
620        */
621       const uint32_t delta = 0;
622       const uint32_t zrange = (zmax << 6) | delta;
623 
624       /* SResults 0 & 1 are set based on the stencil compare state.
625        * For fast-clear, the default value of sr0 and sr1 are both 0x3.
626        */
627       const uint32_t sresults = 0xf;
628 
629       return ((zrange & 0xFFFFF) << 12) |
630              ((smem & 0x3) <<  8) |
631              ((sresults & 0xF) <<  4) |
632              ((zmask & 0xF) <<  0);
633    }
634 }
635 
si_can_fast_clear_depth(struct si_texture * zstex,unsigned level,float depth,unsigned buffers)636 static bool si_can_fast_clear_depth(struct si_texture *zstex, unsigned level, float depth,
637                                     unsigned buffers)
638 {
639    /* TC-compatible HTILE only supports depth clears to 0 or 1. */
640    return buffers & PIPE_CLEAR_DEPTH &&
641           si_htile_enabled(zstex, level, PIPE_MASK_Z) &&
642           (!zstex->tc_compatible_htile || depth == 0 || depth == 1);
643 }
644 
si_can_fast_clear_stencil(struct si_texture * zstex,unsigned level,uint8_t stencil,unsigned buffers)645 static bool si_can_fast_clear_stencil(struct si_texture *zstex, unsigned level, uint8_t stencil,
646                                       unsigned buffers)
647 {
648    /* TC-compatible HTILE only supports stencil clears to 0. */
649    return buffers & PIPE_CLEAR_STENCIL &&
650           si_htile_enabled(zstex, level, PIPE_MASK_S) &&
651           (!zstex->tc_compatible_htile || stencil == 0);
652 }
653 
si_fast_clear(struct si_context * sctx,unsigned * buffers,const union pipe_color_union * color,float depth,uint8_t stencil)654 static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
655                           const union pipe_color_union *color, float depth, uint8_t stencil)
656 {
657    struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
658    struct si_clear_info info[8 * 2 + 1]; /* MRTs * (CMASK + DCC) + ZS */
659    unsigned num_clears = 0;
660    unsigned clear_types = 0;
661    unsigned num_pixels = fb->width * fb->height;
662 
663    /* This function is broken in BE, so just disable this path for now */
664 #if UTIL_ARCH_BIG_ENDIAN
665    return;
666 #endif
667 
668    if (sctx->render_cond)
669       return;
670 
671    /* Gather information about what to clear. */
672    unsigned color_buffer_mask = (*buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);
673    while (color_buffer_mask) {
674       unsigned i = u_bit_scan(&color_buffer_mask);
675 
676       struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture;
677       unsigned level = fb->cbufs[i]->u.tex.level;
678       unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
679 
680       /* the clear is allowed if all layers are bound */
681       if (fb->cbufs[i]->u.tex.first_layer != 0 ||
682           fb->cbufs[i]->u.tex.last_layer != num_layers - 1) {
683          continue;
684       }
685 
686       /* We can change the micro tile mode before a full clear. */
687       /* This is only used for MSAA textures when clearing all layers. */
688       si_set_optimal_micro_tile_mode(sctx->screen, tex);
689 
690       if (tex->swap_rgb_to_bgr_on_next_clear) {
691          assert(!tex->swap_rgb_to_bgr);
692          assert(tex->buffer.b.b.nr_samples >= 2);
693          tex->swap_rgb_to_bgr = true;
694          tex->swap_rgb_to_bgr_on_next_clear = false;
695 
696          /* Update all sampler views and images. */
697          p_atomic_inc(&sctx->screen->dirty_tex_counter);
698       }
699 
700       /* only supported on tiled surfaces */
701       if (tex->surface.is_linear) {
702          continue;
703       }
704 
705       /* Use a slow clear for small surfaces where the cost of
706        * the eliminate pass can be higher than the benefit of fast
707        * clear. The closed driver does this, but the numbers may differ.
708        *
709        * This helps on both dGPUs and APUs, even small APUs like Mullins.
710        */
711       bool fb_too_small = (uint64_t)num_pixels * num_layers <= 512 * 512;
712       bool too_small = tex->buffer.b.b.nr_samples <= 1 && fb_too_small;
713       bool eliminate_needed = false;
714       bool fmask_decompress_needed = false;
715       bool need_dirtying_fb = false;
716 
717       /* Try to clear DCC first, otherwise try CMASK. */
718       if (vi_dcc_enabled(tex, level)) {
719          uint32_t reset_value;
720 
721          if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
722             continue;
723 
724          if (sctx->gfx_level >= GFX11) {
725             if (!gfx11_get_dcc_clear_parameters(sctx->screen, fb->cbufs[i]->format, color,
726                                                 &reset_value))
727                continue;
728          } else {
729             if (!gfx8_get_dcc_clear_parameters(sctx->screen, tex->buffer.b.b.format,
730                                                fb->cbufs[i]->format, color, &reset_value,
731                                                &eliminate_needed))
732                continue;
733          }
734 
735          /* Shared textures can't use fast clear without an explicit flush
736           * because the clear color is not exported.
737           *
738           * Chips without DCC constant encoding must set the clear color registers
739           * correctly even if the fast clear eliminate pass is not needed.
740           */
741          if ((eliminate_needed || !sctx->screen->info.has_dcc_constant_encode) &&
742              tex->buffer.b.is_shared &&
743              !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
744             continue;
745 
746          if (eliminate_needed && too_small)
747             continue;
748 
749          /* We can clear any level, but we only set up the clear value registers for the first
750           * level. Therefore, all other levels can be cleared only if the clear value registers
751           * are not used, which is only the case with DCC constant encoding and 0/1 clear values.
752           */
753          if (level > 0 && (eliminate_needed || !sctx->screen->info.has_dcc_constant_encode))
754             continue;
755 
756          if (tex->buffer.b.b.nr_samples >= 2 && eliminate_needed &&
757              !sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)])
758             continue;
759 
760          assert(num_clears < ARRAY_SIZE(info));
761 
762          if (!vi_dcc_get_clear_info(sctx, tex, level, reset_value, &info[num_clears]))
763             continue;
764 
765          num_clears++;
766          clear_types |= SI_CLEAR_TYPE_DCC;
767 
768          si_mark_display_dcc_dirty(sctx, tex);
769 
770          /* DCC fast clear with MSAA should clear CMASK to 0xC. */
771          if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) {
772             assert(sctx->gfx_level < GFX11); /* no FMASK/CMASK on GFX11 */
773             assert(num_clears < ARRAY_SIZE(info));
774             si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b,
775                                  tex->surface.cmask_offset, tex->surface.cmask_size, 0xCCCCCCCC);
776             clear_types |= SI_CLEAR_TYPE_CMASK;
777             fmask_decompress_needed = true;
778          }
779       } else {
780          /* No CMASK on GFX11. */
781          if (sctx->gfx_level >= GFX11)
782             continue;
783 
784          if (level > 0)
785             continue;
786 
787          /* Shared textures can't use fast clear without an explicit flush
788           * because the clear color is not exported.
789           */
790          if (tex->buffer.b.is_shared &&
791              !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
792             continue;
793 
794          if (too_small)
795             continue;
796 
797          /* 128-bit formats are unsupported */
798          if (tex->surface.bpe > 8) {
799             continue;
800          }
801 
802          /* RB+ doesn't work with CMASK fast clear on Stoney. */
803          if (sctx->family == CHIP_STONEY)
804             continue;
805 
806          /* Disable fast clear if tex is encrypted */
807          if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)
808             continue;
809 
810          uint64_t cmask_offset = 0;
811          unsigned clear_size = 0;
812          bool had_cmask_buffer = tex->cmask_buffer != NULL;
813 
814          if (sctx->gfx_level >= GFX10) {
815             assert(level == 0);
816 
817             /* Clearing CMASK with both multiple levels and multiple layers is not
818              * implemented.
819              */
820             if (num_layers > 1 && tex->buffer.b.b.last_level > 0)
821                continue;
822 
823             if (!si_alloc_separate_cmask(sctx->screen, tex))
824                continue;
825 
826             if (num_layers == 1) {
827                /* Clear level 0. */
828                cmask_offset = tex->surface.cmask_offset + tex->surface.u.gfx9.color.cmask_level0.offset;
829                clear_size = tex->surface.u.gfx9.color.cmask_level0.size;
830             } else if (tex->buffer.b.b.last_level == 0) {
831                /* Clear all layers having only 1 level. */
832                cmask_offset = tex->surface.cmask_offset;
833                clear_size = tex->surface.cmask_size;
834             } else {
835                assert(0); /* this is prevented above */
836             }
837          } else if (sctx->gfx_level == GFX9) {
838             /* TODO: Implement CMASK fast clear for level 0 of mipmapped textures. Mipmapped
839              * CMASK has to clear a rectangular area of CMASK for level 0 (because the whole
840              * miptree is organized in a 2D plane).
841              */
842             if (tex->buffer.b.b.last_level > 0)
843                continue;
844 
845             if (!si_alloc_separate_cmask(sctx->screen, tex))
846                continue;
847 
848             cmask_offset = tex->surface.cmask_offset;
849             clear_size = tex->surface.cmask_size;
850          } else {
851             if (!si_alloc_separate_cmask(sctx->screen, tex))
852                continue;
853 
854             /* GFX6-8: This only covers mipmap level 0. */
855             cmask_offset = tex->surface.cmask_offset;
856             clear_size = tex->surface.cmask_size;
857          }
858 
859          /* Do the fast clear. */
860          assert(num_clears < ARRAY_SIZE(info));
861          si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b,
862                               cmask_offset, clear_size, 0);
863          clear_types |= SI_CLEAR_TYPE_CMASK;
864          eliminate_needed = true;
865          /* If we allocated a cmask buffer for this tex we need to re-emit
866           * the fb state.
867           */
868          need_dirtying_fb = !had_cmask_buffer;
869       }
870 
871       if ((eliminate_needed || fmask_decompress_needed) &&
872           !(tex->dirty_level_mask & (1 << level))) {
873          assert(sctx->gfx_level < GFX11); /* no decompression needed on GFX11 */
874          tex->dirty_level_mask |= 1 << level;
875          p_atomic_inc(&sctx->screen->compressed_colortex_counter);
876       }
877 
878       *buffers &= ~(PIPE_CLEAR_COLOR0 << i);
879 
880       /* Chips with DCC constant encoding don't need to set the clear
881        * color registers for DCC clear values 0 and 1.
882        */
883       if (sctx->screen->info.has_dcc_constant_encode && !eliminate_needed)
884          continue;
885 
886       /* There are no clear color registers on GFX11. */
887       assert(sctx->gfx_level < GFX11);
888 
889       if (si_set_clear_color(tex, fb->cbufs[i]->format, color) || need_dirtying_fb) {
890          sctx->framebuffer.dirty_cbufs |= 1 << i;
891          si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
892       }
893    }
894 
895    /* Depth/stencil clears. */
896    struct pipe_surface *zsbuf = fb->zsbuf;
897    struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL;
898    unsigned zs_num_layers = zstex ? util_num_layers(&zstex->buffer.b.b, zsbuf->u.tex.level) : 0;
899 
900    if (zstex && zsbuf->u.tex.first_layer == 0 &&
901        zsbuf->u.tex.last_layer == zs_num_layers - 1 &&
902        si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS)) {
903       unsigned level = zsbuf->u.tex.level;
904       bool update_db_depth_clear = false;
905       bool update_db_stencil_clear = false;
906       bool fb_too_small = num_pixels * zs_num_layers <= 512 * 512;
907 
908       /* Transition from TC-incompatible to TC-compatible HTILE if requested. */
909       if (zstex->enable_tc_compatible_htile_next_clear) {
910           /* If both depth and stencil are present, they must be cleared together. */
911          if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL ||
912              (*buffers & PIPE_CLEAR_DEPTH && (!zstex->surface.has_stencil ||
913                                               zstex->htile_stencil_disabled))) {
914             /* The conversion from TC-incompatible to TC-compatible can only be done in one clear. */
915             assert(zstex->buffer.b.b.last_level == 0);
916             assert(!zstex->tc_compatible_htile);
917 
918             /* Enable TC-compatible HTILE. */
919             zstex->enable_tc_compatible_htile_next_clear = false;
920             zstex->tc_compatible_htile = true;
921 
922             /* Update the framebuffer state to reflect the change. */
923             sctx->framebuffer.DB_has_shader_readable_metadata = true;
924             sctx->framebuffer.dirty_zsbuf = true;
925             si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
926 
927             /* Update all sampler views and shader images in all contexts. */
928             p_atomic_inc(&sctx->screen->dirty_tex_counter);
929 
930             /* Perform the clear here if possible, else clear to uncompressed. */
931             uint32_t clear_value;
932 
933             if (zstex->htile_stencil_disabled || !zstex->surface.has_stencil) {
934                if (si_can_fast_clear_depth(zstex, level, depth, *buffers)) {
935                   /* Z-only clear. */
936                   clear_value = si_get_htile_clear_value(zstex, depth);
937                   *buffers &= ~PIPE_CLEAR_DEPTH;
938                   zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
939                   zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
940                   update_db_depth_clear = true;
941                }
942             } else if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
943                if (si_can_fast_clear_depth(zstex, level, depth, *buffers) &&
944                    si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {
945                   /* Combined Z+S clear. */
946                   clear_value = si_get_htile_clear_value(zstex, depth);
947                   *buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
948                   zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
949                   zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
950                   zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level);
951                   update_db_depth_clear = true;
952                   update_db_stencil_clear = true;
953                }
954             }
955 
956             if (!update_db_depth_clear) {
957                /* Clear to uncompressed, so that it doesn't contain values incompatible
958                 * with the new TC-compatible HTILE setting.
959                 *
960                 * 0xfffff30f = uncompressed Z + S
961                 * 0xfffc000f = uncompressed Z only
962                 */
963                clear_value = !zstex->htile_stencil_disabled ? 0xfffff30f : 0xfffc000f;
964             }
965 
966             zstex->need_flush_after_depth_decompression = sctx->gfx_level == GFX10_3;
967 
968             assert(num_clears < ARRAY_SIZE(info));
969             si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b,
970                                  zstex->surface.meta_offset, zstex->surface.meta_size, clear_value);
971             clear_types |= SI_CLEAR_TYPE_HTILE;
972          }
973       } else if (num_clears || !fb_too_small) {
974          /* This is where the HTILE buffer clear is done.
975           *
976           * If there is no clear scheduled and the framebuffer size is too small, we should use
977           * the draw-based clear that is without waits. If there is some other clear scheduled,
978           * we will have to wait anyway, so add the HTILE buffer clear to the batch here.
979           * If the framebuffer size is large enough, use this codepath too.
980           */
981          uint64_t htile_offset = zstex->surface.meta_offset;
982          unsigned htile_size = 0;
983 
984          /* Determine the HTILE subset to clear. */
985          if (sctx->gfx_level >= GFX10) {
986             /* This can only clear a layered texture with 1 level or a mipmap texture
987              * with 1 layer. Other cases are unimplemented.
988              */
989             if (zs_num_layers == 1) {
990                /* Clear a specific level. */
991                htile_offset += zstex->surface.u.gfx9.meta_levels[level].offset;
992                htile_size = zstex->surface.u.gfx9.meta_levels[level].size;
993             } else if (zstex->buffer.b.b.last_level == 0) {
994                /* Clear all layers having only 1 level. */
995                htile_size = zstex->surface.meta_size;
996             }
997          } else {
998             /* This can only clear a layered texture with 1 level. Other cases are
999              * unimplemented.
1000              */
1001             if (zstex->buffer.b.b.last_level == 0)
1002                htile_size = zstex->surface.meta_size;
1003          }
1004 
1005          /* Perform the clear if it's possible. */
1006          if (zstex->htile_stencil_disabled || !zstex->surface.has_stencil) {
1007             if (htile_size &&
1008                 si_can_fast_clear_depth(zstex, level, depth, *buffers)) {
1009                /* Z-only clear. */
1010                assert(num_clears < ARRAY_SIZE(info));
1011                si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1012                                     htile_size, si_get_htile_clear_value(zstex, depth));
1013                clear_types |= SI_CLEAR_TYPE_HTILE;
1014                *buffers &= ~PIPE_CLEAR_DEPTH;
1015                zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
1016                zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
1017                update_db_depth_clear = true;
1018             }
1019          } else if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
1020             if (htile_size &&
1021                 si_can_fast_clear_depth(zstex, level, depth, *buffers) &&
1022                 si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {
1023                /* Combined Z+S clear. */
1024                assert(num_clears < ARRAY_SIZE(info));
1025                si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1026                                     htile_size, si_get_htile_clear_value(zstex, depth));
1027                clear_types |= SI_CLEAR_TYPE_HTILE;
1028                *buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
1029                zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
1030                zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
1031                zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level);
1032                update_db_depth_clear = true;
1033                update_db_stencil_clear = true;
1034             }
1035          } else {
1036             /* Z-only or S-only clear when both Z/S are present using a read-modify-write
1037              * compute shader.
1038              *
1039              * If we get both clears but only one of them can be fast-cleared, we use
1040              * the draw-based fast clear to do both at the same time.
1041              */
1042             const uint32_t htile_depth_writemask = 0xfffffc0f;
1043             const uint32_t htile_stencil_writemask = 0x000003f0;
1044 
1045             if (htile_size &&
1046                 !(*buffers & PIPE_CLEAR_STENCIL) &&
1047                 si_can_fast_clear_depth(zstex, level, depth, *buffers)) {
1048                /* Z-only clear with stencil left intact. */
1049                assert(num_clears < ARRAY_SIZE(info));
1050                si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1051                                         htile_size, si_get_htile_clear_value(zstex, depth),
1052                                         htile_depth_writemask);
1053                clear_types |= SI_CLEAR_TYPE_HTILE;
1054                *buffers &= ~PIPE_CLEAR_DEPTH;
1055                zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
1056                zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
1057                update_db_depth_clear = true;
1058             } else if (htile_size &&
1059                        !(*buffers & PIPE_CLEAR_DEPTH) &&
1060                        si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {
1061                /* Stencil-only clear with depth left intact. */
1062                assert(num_clears < ARRAY_SIZE(info));
1063                si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1064                                         htile_size, si_get_htile_clear_value(zstex, depth),
1065                                         htile_stencil_writemask);
1066                clear_types |= SI_CLEAR_TYPE_HTILE;
1067                *buffers &= ~PIPE_CLEAR_STENCIL;
1068                zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level);
1069                update_db_stencil_clear = true;
1070             }
1071          }
1072 
1073          zstex->need_flush_after_depth_decompression = update_db_depth_clear && sctx->gfx_level == GFX10_3;
1074       }
1075 
1076       /* Update DB_DEPTH_CLEAR. */
1077       if (update_db_depth_clear &&
1078           zstex->depth_clear_value[level] != (float)depth) {
1079          zstex->depth_clear_value[level] = depth;
1080          sctx->framebuffer.dirty_zsbuf = true;
1081          si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1082       }
1083 
1084       /* Update DB_STENCIL_CLEAR. */
1085       if (update_db_stencil_clear &&
1086           zstex->stencil_clear_value[level] != stencil) {
1087          zstex->stencil_clear_value[level] = stencil;
1088          sctx->framebuffer.dirty_zsbuf = true;
1089          si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1090       }
1091    }
1092 
1093    si_execute_clears(sctx, info, num_clears, clear_types);
1094 }
1095 
si_clear(struct pipe_context * ctx,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)1096 static void si_clear(struct pipe_context *ctx, unsigned buffers,
1097                      const struct pipe_scissor_state *scissor_state,
1098                      const union pipe_color_union *color, double depth, unsigned stencil)
1099 {
1100    struct si_context *sctx = (struct si_context *)ctx;
1101    struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
1102    struct pipe_surface *zsbuf = fb->zsbuf;
1103    struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL;
1104    bool needs_db_flush = false;
1105 
1106    /* Unset clear flags for non-existent buffers. */
1107    for (unsigned i = 0; i < 8; i++) {
1108       if (i >= fb->nr_cbufs || !fb->cbufs[i])
1109          buffers &= ~(PIPE_CLEAR_COLOR0 << i);
1110    }
1111    if (!zsbuf)
1112       buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
1113    else if (!util_format_has_stencil(util_format_description(zsbuf->format)))
1114       buffers &= ~PIPE_CLEAR_STENCIL;
1115 
1116    si_fast_clear(sctx, &buffers, color, depth, stencil);
1117    if (!buffers)
1118       return; /* all buffers have been cleared */
1119 
1120    if (buffers & PIPE_CLEAR_COLOR) {
1121       /* These buffers cannot use fast clear, make sure to disable expansion. */
1122       unsigned color_buffer_mask = (buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);
1123       while (color_buffer_mask) {
1124          unsigned i = u_bit_scan(&color_buffer_mask);
1125          struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture;
1126          if (tex->surface.fmask_size == 0)
1127             tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
1128       }
1129    }
1130 
1131    if (zstex && zsbuf->u.tex.first_layer == 0 &&
1132        zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {
1133       unsigned level = zsbuf->u.tex.level;
1134 
1135       if (si_can_fast_clear_depth(zstex, level, depth, buffers)) {
1136          /* Need to disable EXPCLEAR temporarily if clearing
1137           * to a new value. */
1138          if (!(zstex->depth_cleared_level_mask_once & BITFIELD_BIT(level)) ||
1139              zstex->depth_clear_value[level] != depth) {
1140             sctx->db_depth_disable_expclear = true;
1141          }
1142 
1143          if (zstex->depth_clear_value[level] != (float)depth) {
1144             if ((zstex->depth_clear_value[level] != 0) != (depth != 0)) {
1145                /* ZRANGE_PRECISION register of a bound surface will change so we
1146                 * must flush the DB caches. */
1147                needs_db_flush = true;
1148             }
1149             /* Update DB_DEPTH_CLEAR. */
1150             zstex->depth_clear_value[level] = depth;
1151             sctx->framebuffer.dirty_zsbuf = true;
1152             si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1153          }
1154          sctx->db_depth_clear = true;
1155          si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1156       }
1157 
1158       if (si_can_fast_clear_stencil(zstex, level, stencil, buffers)) {
1159          stencil &= 0xff;
1160 
1161          /* Need to disable EXPCLEAR temporarily if clearing
1162           * to a new value. */
1163          if (!(zstex->stencil_cleared_level_mask_once & BITFIELD_BIT(level)) ||
1164              zstex->stencil_clear_value[level] != stencil) {
1165             sctx->db_stencil_disable_expclear = true;
1166          }
1167 
1168          if (zstex->stencil_clear_value[level] != (uint8_t)stencil) {
1169             /* Update DB_STENCIL_CLEAR. */
1170             zstex->stencil_clear_value[level] = stencil;
1171             sctx->framebuffer.dirty_zsbuf = true;
1172             si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1173          }
1174          sctx->db_stencil_clear = true;
1175          si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1176       }
1177 
1178       /* TODO: This hack fixes dEQP-GLES[23].functional.fragment_ops.random.* on Navi31.
1179        * The root cause is unknown.
1180        */
1181       if (sctx->gfx_level == GFX11 || sctx->gfx_level == GFX11_5)
1182          needs_db_flush = true;
1183 
1184       if (needs_db_flush) {
1185          sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
1186          si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
1187       }
1188    }
1189 
1190    if (unlikely(sctx->sqtt_enabled)) {
1191       if (buffers & PIPE_CLEAR_COLOR)
1192          sctx->sqtt_next_event = EventCmdClearColorImage;
1193       else if (buffers & PIPE_CLEAR_DEPTHSTENCIL)
1194          sctx->sqtt_next_event = EventCmdClearDepthStencilImage;
1195    }
1196 
1197    si_blitter_begin(sctx, SI_CLEAR);
1198    util_blitter_clear(sctx->blitter, fb->width, fb->height, util_framebuffer_get_num_layers(fb),
1199                       buffers, color, depth, stencil, sctx->framebuffer.nr_samples > 1);
1200    si_blitter_end(sctx);
1201 
1202    if (sctx->db_depth_clear) {
1203       sctx->db_depth_clear = false;
1204       sctx->db_depth_disable_expclear = false;
1205       zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(zsbuf->u.tex.level);
1206       zstex->depth_cleared_level_mask |= BITFIELD_BIT(zsbuf->u.tex.level);
1207       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1208    }
1209 
1210    if (sctx->db_stencil_clear) {
1211       sctx->db_stencil_clear = false;
1212       sctx->db_stencil_disable_expclear = false;
1213       zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(zsbuf->u.tex.level);
1214       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1215    }
1216 }
1217 
si_try_normal_clear(struct si_context * sctx,struct pipe_surface * dst,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled,unsigned buffers,const union pipe_color_union * color,float depth,unsigned stencil)1218 static bool si_try_normal_clear(struct si_context *sctx, struct pipe_surface *dst,
1219                                 unsigned dstx, unsigned dsty, unsigned width, unsigned height,
1220                                 bool render_condition_enabled, unsigned buffers,
1221                                 const union pipe_color_union *color,
1222                                 float depth, unsigned stencil)
1223 {
1224    /* This is worth it only if it's a whole image clear, so that we just clear DCC/HTILE. */
1225    if (dstx == 0 && dsty == 0 &&
1226        width == dst->width &&
1227        height == dst->height &&
1228        dst->u.tex.first_layer == 0 &&
1229        dst->u.tex.last_layer == util_max_layer(dst->texture, dst->u.tex.level) &&
1230        /* pipe->clear honors render_condition, so only use it if it's unset or if it's set and enabled. */
1231        (!sctx->render_cond || render_condition_enabled) &&
1232        sctx->has_graphics) {
1233       struct pipe_context *ctx = &sctx->b;
1234       struct pipe_framebuffer_state saved_fb = {}, fb = {};
1235 
1236       util_copy_framebuffer_state(&saved_fb, &sctx->framebuffer.state);
1237 
1238       if (buffers & PIPE_CLEAR_COLOR) {
1239          fb.cbufs[0] = dst;
1240          fb.nr_cbufs = 1;
1241       } else {
1242          fb.zsbuf = dst;
1243       }
1244 
1245       fb.width = dst->width;
1246       fb.height = dst->height;
1247 
1248       ctx->set_framebuffer_state(ctx, &fb);
1249       ctx->clear(ctx, buffers, NULL, color, depth, stencil);
1250       ctx->set_framebuffer_state(ctx, &saved_fb);
1251 
1252       util_copy_framebuffer_state(&saved_fb, NULL);
1253 
1254       return true;
1255    }
1256 
1257    return false;
1258 }
1259 
si_clear_render_target(struct pipe_context * ctx,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)1260 static void si_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dst,
1261                                    const union pipe_color_union *color, unsigned dstx,
1262                                    unsigned dsty, unsigned width, unsigned height,
1263                                    bool render_condition_enabled)
1264 {
1265    struct si_context *sctx = (struct si_context *)ctx;
1266    struct si_texture *sdst = (struct si_texture *)dst->texture;
1267 
1268    /* Fast path that just clears DCC. */
1269    if (si_try_normal_clear(sctx, dst, dstx, dsty, width, height, render_condition_enabled,
1270                            PIPE_CLEAR_COLOR0, color, 0, 0))
1271       return;
1272 
1273    if (dst->texture->nr_samples <= 1 &&
1274        (sctx->gfx_level >= GFX10 || !vi_dcc_enabled(sdst, dst->u.tex.level))) {
1275       si_compute_clear_render_target(ctx, dst, color, dstx, dsty, width, height,
1276                                      render_condition_enabled);
1277       return;
1278    }
1279 
1280    si_blitter_begin(sctx,
1281                     SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
1282    util_blitter_clear_render_target(sctx->blitter, dst, color, dstx, dsty, width, height);
1283    si_blitter_end(sctx);
1284 }
1285 
si_clear_depth_stencil(struct pipe_context * ctx,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)1286 static void si_clear_depth_stencil(struct pipe_context *ctx, struct pipe_surface *dst,
1287                                    unsigned clear_flags, double depth, unsigned stencil,
1288                                    unsigned dstx, unsigned dsty, unsigned width, unsigned height,
1289                                    bool render_condition_enabled)
1290 {
1291    struct si_context *sctx = (struct si_context *)ctx;
1292    union pipe_color_union unused = {};
1293 
1294    /* Fast path that just clears HTILE. */
1295    if (si_try_normal_clear(sctx, dst, dstx, dsty, width, height, render_condition_enabled,
1296                            clear_flags, &unused, depth, stencil))
1297       return;
1298 
1299    si_blitter_begin(sctx,
1300                     SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
1301    util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty,
1302                                     width, height);
1303    si_blitter_end(sctx);
1304 }
1305 
si_init_clear_functions(struct si_context * sctx)1306 void si_init_clear_functions(struct si_context *sctx)
1307 {
1308    sctx->b.clear_render_target = si_clear_render_target;
1309    sctx->b.clear_texture = u_default_clear_texture;
1310 
1311    if (sctx->has_graphics) {
1312       sctx->b.clear = si_clear;
1313       sctx->b.clear_depth_stencil = si_clear_depth_stencil;
1314    }
1315 }
1316