• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  * Copyright 2018 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 #include "drm-uapi/drm_fourcc.h"
27 #include "si_pipe.h"
28 #include "si_query.h"
29 #include "sid.h"
30 #include "frontend/drm_driver.h"
31 #include "util/format/u_format.h"
32 #include "util/os_time.h"
33 #include "util/u_log.h"
34 #include "util/u_memory.h"
35 #include "util/u_pack_color.h"
36 #include "util/u_resource.h"
37 #include "util/u_surface.h"
38 #include "util/u_transfer.h"
39 
40 #include <errno.h>
41 #include <inttypes.h>
42 
43 #include "amd/addrlib/inc/addrinterface.h"
44 
45 static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen,
46                                               const struct pipe_resource *templ,
47                                               bool tc_compatible_htile);
48 
49 static bool si_texture_is_aux_plane(const struct pipe_resource *resource);
50 
51 /* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
si_copy_region_with_blit(struct pipe_context * pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dst_sample,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)52 static void si_copy_region_with_blit(struct pipe_context *pipe, struct pipe_resource *dst,
53                                      unsigned dst_level, unsigned dst_sample, unsigned dstx, unsigned dsty,
54                                      unsigned dstz, struct pipe_resource *src, unsigned src_level,
55                                      const struct pipe_box *src_box)
56 {
57    struct pipe_blit_info blit;
58 
59    memset(&blit, 0, sizeof(blit));
60    blit.src.resource = src;
61    blit.src.format = src->format;
62    blit.src.level = src_level;
63    blit.src.box = *src_box;
64    blit.dst.resource = dst;
65    blit.dst.format = dst->format;
66    blit.dst.level = dst_level;
67    blit.dst.box.x = dstx;
68    blit.dst.box.y = dsty;
69    blit.dst.box.z = dstz;
70    blit.dst.box.width = src_box->width;
71    blit.dst.box.height = src_box->height;
72    blit.dst.box.depth = src_box->depth;
73    blit.mask = util_format_get_mask(dst->format);
74    blit.filter = PIPE_TEX_FILTER_NEAREST;
75    blit.dst_sample = dst_sample;
76 
77    if (blit.mask) {
78       /* Only the gfx blit handles dst_sample. */
79       if (dst_sample)
80          si_gfx_blit(pipe, &blit);
81       else
82          pipe->blit(pipe, &blit);
83    }
84 }
85 
86 /* Copy from a full GPU texture to a transfer's staging one. */
si_copy_to_staging_texture(struct pipe_context * ctx,struct si_transfer * stransfer)87 static void si_copy_to_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer)
88 {
89    struct pipe_transfer *transfer = (struct pipe_transfer *)stransfer;
90    struct pipe_resource *dst = &stransfer->staging->b.b;
91    struct pipe_resource *src = transfer->resource;
92    /* level means sample_index - 1 with MSAA. Used by texture uploads. */
93    unsigned src_level = src->nr_samples > 1 ? 0 : transfer->level;
94 
95    if (src->nr_samples > 1 || ((struct si_texture *)src)->is_depth) {
96       si_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, 0, src, src_level, &transfer->box);
97       return;
98    }
99 
100    si_resource_copy_region(ctx, dst, 0, 0, 0, 0, src, src_level, &transfer->box);
101 }
102 
103 /* Copy from a transfer's staging texture to a full GPU one. */
si_copy_from_staging_texture(struct pipe_context * ctx,struct si_transfer * stransfer)104 static void si_copy_from_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer)
105 {
106    struct pipe_transfer *transfer = (struct pipe_transfer *)stransfer;
107    struct pipe_resource *dst = transfer->resource;
108    struct pipe_resource *src = &stransfer->staging->b.b;
109    struct pipe_box sbox;
110 
111    u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
112 
113    if (dst->nr_samples > 1 || ((struct si_texture *)dst)->is_depth) {
114       unsigned dst_level = dst->nr_samples > 1 ? 0 : transfer->level;
115       unsigned dst_sample = dst->nr_samples > 1 ? transfer->level : 0;
116 
117       si_copy_region_with_blit(ctx, dst, dst_level, dst_sample, transfer->box.x, transfer->box.y,
118                                transfer->box.z, src, 0, &sbox);
119       return;
120    }
121 
122    if (util_format_is_compressed(dst->format)) {
123       sbox.width = util_format_get_nblocksx(dst->format, sbox.width);
124       sbox.height = util_format_get_nblocksx(dst->format, sbox.height);
125    }
126 
127    si_resource_copy_region(ctx, dst, transfer->level, transfer->box.x, transfer->box.y,
128                            transfer->box.z, src, 0, &sbox);
129 }
130 
si_texture_get_offset(struct si_screen * sscreen,struct si_texture * tex,unsigned level,const struct pipe_box * box,unsigned * stride,unsigned * layer_stride)131 static unsigned si_texture_get_offset(struct si_screen *sscreen, struct si_texture *tex,
132                                       unsigned level, const struct pipe_box *box, unsigned *stride,
133                                       unsigned *layer_stride)
134 {
135    if (sscreen->info.gfx_level >= GFX9) {
136       unsigned pitch;
137       if (tex->surface.is_linear) {
138          pitch = tex->surface.u.gfx9.pitch[level];
139       } else {
140          pitch = tex->surface.u.gfx9.surf_pitch;
141       }
142 
143       *stride = pitch * tex->surface.bpe;
144       *layer_stride = tex->surface.u.gfx9.surf_slice_size;
145 
146       if (!box)
147          return 0;
148 
149       /* Each texture is an array of slices. Each slice is an array
150        * of mipmap levels. */
151       return tex->surface.u.gfx9.surf_offset + box->z * tex->surface.u.gfx9.surf_slice_size +
152              tex->surface.u.gfx9.offset[level] +
153              (box->y / tex->surface.blk_h * pitch + box->x / tex->surface.blk_w) *
154              tex->surface.bpe;
155    } else {
156       *stride = tex->surface.u.legacy.level[level].nblk_x * tex->surface.bpe;
157       assert((uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 <= UINT_MAX);
158       *layer_stride = (uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4;
159 
160       if (!box)
161          return (uint64_t)tex->surface.u.legacy.level[level].offset_256B * 256;
162 
163       /* Each texture is an array of mipmap levels. Each level is
164        * an array of slices. */
165       return (uint64_t)tex->surface.u.legacy.level[level].offset_256B * 256 +
166              box->z * (uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 +
167              (box->y / tex->surface.blk_h * tex->surface.u.legacy.level[level].nblk_x +
168               box->x / tex->surface.blk_w) *
169                 tex->surface.bpe;
170    }
171 }
172 
si_init_surface(struct si_screen * sscreen,struct radeon_surf * surface,const struct pipe_resource * ptex,enum radeon_surf_mode array_mode,uint64_t modifier,bool is_imported,bool is_scanout,bool is_flushed_depth,bool tc_compatible_htile)173 static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surface,
174                            const struct pipe_resource *ptex, enum radeon_surf_mode array_mode,
175                            uint64_t modifier, bool is_imported, bool is_scanout,
176                            bool is_flushed_depth, bool tc_compatible_htile)
177 {
178    const struct util_format_description *desc = util_format_description(ptex->format);
179    bool is_depth, is_stencil;
180    int r;
181    unsigned bpe;
182    uint64_t flags = 0;
183 
184    is_depth = util_format_has_depth(desc);
185    is_stencil = util_format_has_stencil(desc);
186 
187    if (!is_flushed_depth && ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
188       bpe = 4; /* stencil is allocated separately */
189    } else {
190       bpe = util_format_get_blocksize(ptex->format);
191       assert(util_is_power_of_two_or_zero(bpe));
192    }
193 
194    if (!is_flushed_depth && is_depth) {
195       flags |= RADEON_SURF_ZBUFFER;
196 
197       if ((sscreen->debug_flags & DBG(NO_HYPERZ)) ||
198           (ptex->bind & PIPE_BIND_SHARED) || is_imported) {
199          flags |= RADEON_SURF_NO_HTILE;
200       } else if (tc_compatible_htile &&
201                  (sscreen->info.gfx_level >= GFX9 || array_mode == RADEON_SURF_MODE_2D)) {
202          /* TC-compatible HTILE only supports Z32_FLOAT.
203           * GFX9 also supports Z16_UNORM.
204           * On GFX8, promote Z16 to Z32. DB->CB copies will convert
205           * the format for transfers.
206           */
207          if (sscreen->info.gfx_level == GFX8)
208             bpe = 4;
209 
210          flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
211       }
212 
213       if (is_stencil)
214          flags |= RADEON_SURF_SBUFFER;
215    }
216 
217    /* Disable DCC? (it can't be disabled if modifiers are used) */
218    if (sscreen->info.gfx_level >= GFX8 && modifier == DRM_FORMAT_MOD_INVALID && !is_imported) {
219       /* Global options that disable DCC. */
220       if (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC)
221          flags |= RADEON_SURF_DISABLE_DCC;
222 
223       if (ptex->nr_samples >= 2 && sscreen->debug_flags & DBG(NO_DCC_MSAA))
224          flags |= RADEON_SURF_DISABLE_DCC;
225 
226       /* Shared textures must always set up DCC. If it's not present, it will be disabled by
227        * si_get_opaque_metadata later.
228        */
229       if (!is_imported &&
230           (sscreen->debug_flags & DBG(NO_DCC) ||
231            (ptex->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_DCC))))
232          flags |= RADEON_SURF_DISABLE_DCC;
233 
234       /* R9G9B9E5 isn't supported for rendering by older generations. */
235       if (sscreen->info.gfx_level < GFX10_3 &&
236           ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT)
237          flags |= RADEON_SURF_DISABLE_DCC;
238 
239       switch (sscreen->info.gfx_level) {
240       case GFX8:
241          /* Stoney: 128bpp MSAA textures randomly fail piglit tests with DCC. */
242          if (sscreen->info.family == CHIP_STONEY && bpe == 16 && ptex->nr_samples >= 2)
243             flags |= RADEON_SURF_DISABLE_DCC;
244 
245          /* DCC clear for 4x and 8x MSAA array textures unimplemented. */
246          if (ptex->nr_storage_samples >= 4 && ptex->array_size > 1)
247             flags |= RADEON_SURF_DISABLE_DCC;
248          break;
249 
250       case GFX9:
251          /* DCC MSAA fails this on Raven:
252           *    https://www.khronos.org/registry/webgl/sdk/tests/deqp/functional/gles3/fbomultisample.2_samples.html
253           * and this on Picasso:
254           *    https://www.khronos.org/registry/webgl/sdk/tests/deqp/functional/gles3/fbomultisample.4_samples.html
255           */
256          if (sscreen->info.family == CHIP_RAVEN && ptex->nr_storage_samples >= 2 && bpe < 4)
257             flags |= RADEON_SURF_DISABLE_DCC;
258          break;
259 
260       case GFX10:
261       case GFX10_3:
262          if (ptex->nr_storage_samples >= 2 && !sscreen->options.dcc_msaa)
263             flags |= RADEON_SURF_DISABLE_DCC;
264          break;
265 
266       case GFX11:
267          break;
268 
269       default:
270          assert(0);
271       }
272    }
273 
274    if (is_scanout) {
275       /* This should catch bugs in gallium users setting incorrect flags. */
276       assert(ptex->nr_samples <= 1 && ptex->array_size == 1 && ptex->depth0 == 1 &&
277              ptex->last_level == 0 && !(flags & RADEON_SURF_Z_OR_SBUFFER));
278 
279       flags |= RADEON_SURF_SCANOUT;
280    }
281 
282    if (ptex->bind & PIPE_BIND_SHARED)
283       flags |= RADEON_SURF_SHAREABLE;
284    if (is_imported)
285       flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
286    if (sscreen->debug_flags & DBG(NO_FMASK))
287       flags |= RADEON_SURF_NO_FMASK;
288 
289    if (sscreen->info.gfx_level == GFX9 && (ptex->flags & SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE)) {
290       flags |= RADEON_SURF_FORCE_MICRO_TILE_MODE;
291       surface->micro_tile_mode = SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(ptex->flags);
292    }
293 
294    if (ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING) {
295       /* GFX11 shouldn't get here because the flag is only used by the CB MSAA resolving
296        * that GFX11 doesn't have.
297        */
298       assert(sscreen->info.gfx_level <= GFX10_3);
299 
300       flags |= RADEON_SURF_FORCE_SWIZZLE_MODE;
301 
302       if (sscreen->info.gfx_level >= GFX10)
303          surface->u.gfx9.swizzle_mode = ADDR_SW_64KB_R_X;
304    }
305 
306    if (ptex->flags & PIPE_RESOURCE_FLAG_SPARSE) {
307       flags |=
308          RADEON_SURF_PRT |
309          RADEON_SURF_NO_FMASK |
310          RADEON_SURF_NO_HTILE |
311          RADEON_SURF_DISABLE_DCC;
312    }
313 
314    surface->modifier = modifier;
315 
316    r = sscreen->ws->surface_init(sscreen->ws, ptex, flags, bpe, array_mode, surface);
317    if (r) {
318       return r;
319    }
320 
321    return 0;
322 }
323 
si_eliminate_fast_color_clear(struct si_context * sctx,struct si_texture * tex,bool * ctx_flushed)324 void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex,
325                                    bool *ctx_flushed)
326 {
327    struct pipe_context *ctx = &sctx->b;
328 
329    unsigned n = sctx->num_decompress_calls;
330    ctx->flush_resource(ctx, &tex->buffer.b.b);
331 
332    /* Flush only if any fast clear elimination took place. */
333    bool flushed = false;
334    if (n != sctx->num_decompress_calls)
335    {
336       ctx->flush(ctx, NULL, 0);
337       flushed = true;
338    }
339    if (ctx_flushed)
340       *ctx_flushed = flushed;
341 }
342 
si_texture_discard_cmask(struct si_screen * sscreen,struct si_texture * tex)343 void si_texture_discard_cmask(struct si_screen *sscreen, struct si_texture *tex)
344 {
345    if (!tex->cmask_buffer)
346       return;
347 
348    assert(tex->buffer.b.b.nr_samples <= 1);
349 
350    /* Disable CMASK. */
351    tex->cmask_base_address_reg = tex->buffer.gpu_address >> 8;
352    tex->dirty_level_mask = 0;
353 
354    tex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
355 
356    if (tex->cmask_buffer != &tex->buffer)
357       si_resource_reference(&tex->cmask_buffer, NULL);
358 
359    tex->cmask_buffer = NULL;
360 
361    /* Notify all contexts about the change. */
362    p_atomic_inc(&sscreen->dirty_tex_counter);
363    p_atomic_inc(&sscreen->compressed_colortex_counter);
364 }
365 
si_can_disable_dcc(struct si_texture * tex)366 static bool si_can_disable_dcc(struct si_texture *tex)
367 {
368    /* We can't disable DCC if it can be written by another process. */
369    return !tex->is_depth &&
370           tex->surface.meta_offset &&
371           (!tex->buffer.b.is_shared ||
372            !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)) &&
373           !ac_modifier_has_dcc(tex->surface.modifier);
374 }
375 
si_texture_discard_dcc(struct si_screen * sscreen,struct si_texture * tex)376 static bool si_texture_discard_dcc(struct si_screen *sscreen, struct si_texture *tex)
377 {
378    if (!si_can_disable_dcc(tex))
379       return false;
380 
381    /* Disable DCC. */
382    ac_surface_zero_dcc_fields(&tex->surface);
383 
384    /* Notify all contexts about the change. */
385    p_atomic_inc(&sscreen->dirty_tex_counter);
386    return true;
387 }
388 
389 /**
390  * Disable DCC for the texture. (first decompress, then discard metadata).
391  *
392  * There is unresolved multi-context synchronization issue between
393  * screen::aux_context and the current context. If applications do this with
394  * multiple contexts, it's already undefined behavior for them and we don't
395  * have to worry about that. The scenario is:
396  *
397  * If context 1 disables DCC and context 2 has queued commands that write
398  * to the texture via CB with DCC enabled, and the order of operations is
399  * as follows:
400  *   context 2 queues draw calls rendering to the texture, but doesn't flush
401  *   context 1 disables DCC and flushes
402  *   context 1 & 2 reset descriptors and FB state
403  *   context 2 flushes (new compressed tiles written by the draw calls)
404  *   context 1 & 2 read garbage, because DCC is disabled, yet there are
405  *   compressed tiled
406  *
407  * \param sctx  the current context if you have one, or sscreen->aux_context
408  *              if you don't.
409  */
si_texture_disable_dcc(struct si_context * sctx,struct si_texture * tex)410 bool si_texture_disable_dcc(struct si_context *sctx, struct si_texture *tex)
411 {
412    struct si_screen *sscreen = sctx->screen;
413 
414    if (!sctx->has_graphics)
415       return si_texture_discard_dcc(sscreen, tex);
416 
417    if (!si_can_disable_dcc(tex))
418       return false;
419 
420    /* Decompress DCC. */
421    si_decompress_dcc(sctx, tex);
422    sctx->b.flush(&sctx->b, NULL, 0);
423 
424    return si_texture_discard_dcc(sscreen, tex);
425 }
426 
si_reallocate_texture_inplace(struct si_context * sctx,struct si_texture * tex,unsigned new_bind_flag,bool invalidate_storage)427 static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_texture *tex,
428                                           unsigned new_bind_flag, bool invalidate_storage)
429 {
430    struct pipe_screen *screen = sctx->b.screen;
431    struct si_texture *new_tex;
432    struct pipe_resource templ = tex->buffer.b.b;
433    unsigned i;
434 
435    templ.bind |= new_bind_flag;
436 
437    if (tex->buffer.b.is_shared || tex->num_planes > 1)
438       return;
439 
440    if (new_bind_flag == PIPE_BIND_LINEAR) {
441       if (tex->surface.is_linear)
442          return;
443 
444       /* This fails with MSAA, depth, and compressed textures. */
445       if (si_choose_tiling(sctx->screen, &templ, false) != RADEON_SURF_MODE_LINEAR_ALIGNED)
446          return;
447    }
448 
449    new_tex = (struct si_texture *)screen->resource_create(screen, &templ);
450    if (!new_tex)
451       return;
452 
453    /* Copy the pixels to the new texture. */
454    if (!invalidate_storage) {
455       for (i = 0; i <= templ.last_level; i++) {
456          struct pipe_box box;
457 
458          u_box_3d(0, 0, 0, u_minify(templ.width0, i), u_minify(templ.height0, i),
459                   util_num_layers(&templ, i), &box);
460 
461          si_resource_copy_region(&sctx->b, &new_tex->buffer.b.b,
462                                  i, 0, 0, 0, &tex->buffer.b.b, i, &box);
463       }
464    }
465 
466    if (new_bind_flag == PIPE_BIND_LINEAR) {
467       si_texture_discard_cmask(sctx->screen, tex);
468       si_texture_discard_dcc(sctx->screen, tex);
469    }
470 
471    /* Replace the structure fields of tex. */
472    tex->buffer.b.b.bind = templ.bind;
473    radeon_bo_reference(sctx->screen->ws, &tex->buffer.buf, new_tex->buffer.buf);
474    tex->buffer.gpu_address = new_tex->buffer.gpu_address;
475    tex->buffer.memory_usage_kb = new_tex->buffer.memory_usage_kb;
476    tex->buffer.bo_size = new_tex->buffer.bo_size;
477    tex->buffer.bo_alignment_log2 = new_tex->buffer.bo_alignment_log2;
478    tex->buffer.domains = new_tex->buffer.domains;
479    tex->buffer.flags = new_tex->buffer.flags;
480 
481    tex->surface = new_tex->surface;
482    si_texture_reference(&tex->flushed_depth_texture, new_tex->flushed_depth_texture);
483 
484    tex->surface.fmask_offset = new_tex->surface.fmask_offset;
485    tex->surface.cmask_offset = new_tex->surface.cmask_offset;
486    tex->cmask_base_address_reg = new_tex->cmask_base_address_reg;
487 
488    if (tex->cmask_buffer == &tex->buffer)
489       tex->cmask_buffer = NULL;
490    else
491       si_resource_reference(&tex->cmask_buffer, NULL);
492 
493    if (new_tex->cmask_buffer == &new_tex->buffer)
494       tex->cmask_buffer = &tex->buffer;
495    else
496       si_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer);
497 
498    tex->surface.meta_offset = new_tex->surface.meta_offset;
499    tex->cb_color_info = new_tex->cb_color_info;
500    memcpy(tex->color_clear_value, new_tex->color_clear_value, sizeof(tex->color_clear_value));
501    tex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
502 
503    memcpy(tex->depth_clear_value, new_tex->depth_clear_value, sizeof(tex->depth_clear_value));
504    tex->dirty_level_mask = new_tex->dirty_level_mask;
505    tex->stencil_dirty_level_mask = new_tex->stencil_dirty_level_mask;
506    tex->db_render_format = new_tex->db_render_format;
507    memcpy(tex->stencil_clear_value, new_tex->stencil_clear_value, sizeof(tex->stencil_clear_value));
508    tex->tc_compatible_htile = new_tex->tc_compatible_htile;
509    tex->depth_cleared_level_mask_once = new_tex->depth_cleared_level_mask_once;
510    tex->stencil_cleared_level_mask_once = new_tex->stencil_cleared_level_mask_once;
511    tex->upgraded_depth = new_tex->upgraded_depth;
512    tex->db_compatible = new_tex->db_compatible;
513    tex->can_sample_z = new_tex->can_sample_z;
514    tex->can_sample_s = new_tex->can_sample_s;
515 
516    tex->displayable_dcc_dirty = new_tex->displayable_dcc_dirty;
517 
518    if (new_bind_flag == PIPE_BIND_LINEAR) {
519       assert(!tex->surface.meta_offset);
520       assert(!tex->cmask_buffer);
521       assert(!tex->surface.fmask_size);
522       assert(!tex->is_depth);
523    }
524 
525    si_texture_reference(&new_tex, NULL);
526 
527    p_atomic_inc(&sctx->screen->dirty_tex_counter);
528 }
529 
si_set_tex_bo_metadata(struct si_screen * sscreen,struct si_texture * tex)530 static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture *tex)
531 {
532    struct pipe_resource *res = &tex->buffer.b.b;
533    struct radeon_bo_metadata md;
534 
535    memset(&md, 0, sizeof(md));
536 
537    assert(tex->surface.fmask_size == 0);
538 
539    static const unsigned char swizzle[] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
540                                            PIPE_SWIZZLE_W};
541    bool is_array = util_texture_is_array(res->target);
542    uint32_t desc[8];
543 
544    sscreen->make_texture_descriptor(sscreen, tex, true, res->target, res->format, swizzle, 0,
545                                     res->last_level, 0, is_array ? res->array_size - 1 : 0,
546                                     res->width0, res->height0, res->depth0, desc, NULL);
547    si_set_mutable_tex_desc_fields(sscreen, tex, &tex->surface.u.legacy.level[0], 0, 0,
548                                   tex->surface.blk_w, false, 0, desc);
549 
550    ac_surface_get_umd_metadata(&sscreen->info, &tex->surface,
551                                tex->buffer.b.b.last_level + 1,
552                                desc, &md.size_metadata, md.metadata);
553    sscreen->ws->buffer_set_metadata(sscreen->ws, tex->buffer.buf, &md, &tex->surface);
554 }
555 
si_displayable_dcc_needs_explicit_flush(struct si_texture * tex)556 static bool si_displayable_dcc_needs_explicit_flush(struct si_texture *tex)
557 {
558    struct si_screen *sscreen = (struct si_screen *)tex->buffer.b.b.screen;
559 
560    if (sscreen->info.gfx_level <= GFX8)
561       return false;
562 
563    /* With modifiers and > 1 planes any applications will know that they
564     * cannot do frontbuffer rendering with the texture. */
565    if (ac_surface_get_nplanes(&tex->surface) > 1)
566       return false;
567 
568    return tex->surface.is_displayable && tex->surface.meta_offset;
569 }
570 
si_resource_get_param(struct pipe_screen * screen,struct pipe_context * context,struct pipe_resource * resource,unsigned plane,unsigned layer,unsigned level,enum pipe_resource_param param,unsigned handle_usage,uint64_t * value)571 static bool si_resource_get_param(struct pipe_screen *screen, struct pipe_context *context,
572                                   struct pipe_resource *resource, unsigned plane, unsigned layer,
573                                   unsigned level,
574                                   enum pipe_resource_param param, unsigned handle_usage,
575                                   uint64_t *value)
576 {
577    while (plane && resource->next && !si_texture_is_aux_plane(resource->next)) {
578       --plane;
579       resource = resource->next;
580    }
581 
582    struct si_screen *sscreen = (struct si_screen *)screen;
583    struct si_texture *tex = (struct si_texture *)resource;
584    struct winsys_handle whandle;
585 
586    switch (param) {
587    case PIPE_RESOURCE_PARAM_NPLANES:
588       if (resource->target == PIPE_BUFFER)
589          *value = 1;
590       else if (tex->num_planes > 1)
591          *value = tex->num_planes;
592       else
593          *value = ac_surface_get_nplanes(&tex->surface);
594       return true;
595 
596    case PIPE_RESOURCE_PARAM_STRIDE:
597       if (resource->target == PIPE_BUFFER)
598          *value = 0;
599       else
600          *value = ac_surface_get_plane_stride(sscreen->info.gfx_level,
601                                               &tex->surface, plane, level);
602       return true;
603 
604    case PIPE_RESOURCE_PARAM_OFFSET:
605       if (resource->target == PIPE_BUFFER) {
606          *value = 0;
607       } else {
608          uint64_t level_offset = tex->surface.is_linear ? tex->surface.u.gfx9.offset[level] : 0;
609          *value = ac_surface_get_plane_offset(sscreen->info.gfx_level,
610                                               &tex->surface, plane, layer)  + level_offset;
611       }
612       return true;
613 
614    case PIPE_RESOURCE_PARAM_MODIFIER:
615       *value = tex->surface.modifier;
616       return true;
617 
618    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED:
619    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS:
620    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD:
621       memset(&whandle, 0, sizeof(whandle));
622 
623       if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED)
624          whandle.type = WINSYS_HANDLE_TYPE_SHARED;
625       else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS)
626          whandle.type = WINSYS_HANDLE_TYPE_KMS;
627       else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD)
628          whandle.type = WINSYS_HANDLE_TYPE_FD;
629 
630       if (!screen->resource_get_handle(screen, context, resource, &whandle, handle_usage))
631          return false;
632 
633       *value = whandle.handle;
634       return true;
635    case PIPE_RESOURCE_PARAM_LAYER_STRIDE:
636       break;
637    }
638    return false;
639 }
640 
si_texture_get_info(struct pipe_screen * screen,struct pipe_resource * resource,unsigned * pstride,unsigned * poffset)641 static void si_texture_get_info(struct pipe_screen *screen, struct pipe_resource *resource,
642                                 unsigned *pstride, unsigned *poffset)
643 {
644    uint64_t value;
645 
646    if (pstride) {
647       si_resource_get_param(screen, NULL, resource, 0, 0, 0, PIPE_RESOURCE_PARAM_STRIDE, 0, &value);
648       *pstride = value;
649    }
650 
651    if (poffset) {
652       si_resource_get_param(screen, NULL, resource, 0, 0, 0, PIPE_RESOURCE_PARAM_OFFSET, 0, &value);
653       *poffset = value;
654    }
655 }
656 
si_texture_get_handle(struct pipe_screen * screen,struct pipe_context * ctx,struct pipe_resource * resource,struct winsys_handle * whandle,unsigned usage)657 static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_context *ctx,
658                                   struct pipe_resource *resource, struct winsys_handle *whandle,
659                                   unsigned usage)
660 {
661    struct si_screen *sscreen = (struct si_screen *)screen;
662    struct si_context *sctx;
663    struct si_resource *res = si_resource(resource);
664    struct si_texture *tex = (struct si_texture *)resource;
665    bool update_metadata = false;
666    unsigned stride, offset, slice_size;
667    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
668    bool flush = false;
669 
670    ctx = threaded_context_unwrap_sync(ctx);
671    sctx = ctx ? (struct si_context *)ctx : si_get_aux_context(sscreen);
672 
673    if (resource->target != PIPE_BUFFER) {
674       unsigned plane = whandle->plane;
675 
676       /* Individual planes are chained pipe_resource instances. */
677       while (plane && resource->next && !si_texture_is_aux_plane(resource->next)) {
678          resource = resource->next;
679          --plane;
680       }
681 
682       res = si_resource(resource);
683       tex = (struct si_texture *)resource;
684 
685       /* This is not supported now, but it might be required for OpenCL
686        * interop in the future.
687        */
688       if (resource->nr_samples > 1 || tex->is_depth) {
689          if (!ctx)
690             si_put_aux_context_flush(sscreen);
691          return false;
692       }
693 
694       whandle->size = tex->buffer.bo_size;
695 
696       if (plane) {
697          if (!ctx)
698             si_put_aux_context_flush(sscreen);
699          whandle->offset = ac_surface_get_plane_offset(sscreen->info.gfx_level,
700                                                        &tex->surface, plane, 0);
701          whandle->stride = ac_surface_get_plane_stride(sscreen->info.gfx_level,
702                                                        &tex->surface, plane, 0);
703          whandle->modifier = tex->surface.modifier;
704          return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, whandle);
705       }
706 
707       /* Move a suballocated texture into a non-suballocated allocation. */
708       if (sscreen->ws->buffer_is_suballocated(res->buf) || tex->surface.tile_swizzle ||
709           (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
710            sscreen->info.has_local_buffers)) {
711          assert(!res->b.is_shared);
712          si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_SHARED, false);
713          flush = true;
714          assert(res->b.b.bind & PIPE_BIND_SHARED);
715          assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
716          assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING));
717          assert(tex->surface.tile_swizzle == 0);
718       }
719 
720       /* Since shader image stores don't support DCC on GFX8,
721        * disable it for external clients that want write
722        * access.
723        */
724       if (sscreen->debug_flags & DBG(NO_EXPORTED_DCC) ||
725           (usage & PIPE_HANDLE_USAGE_SHADER_WRITE && !tex->is_depth && tex->surface.meta_offset) ||
726           /* Displayable DCC requires an explicit flush. */
727           (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
728            si_displayable_dcc_needs_explicit_flush(tex))) {
729          if (si_texture_disable_dcc(sctx, tex)) {
730             update_metadata = true;
731             /* si_texture_disable_dcc flushes the context */
732             flush = false;
733          }
734       }
735 
736       if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
737           (tex->cmask_buffer || (!tex->is_depth && tex->surface.meta_offset))) {
738          /* Eliminate fast clear (both CMASK and DCC) */
739          bool flushed;
740          si_eliminate_fast_color_clear(sctx, tex, &flushed);
741          /* eliminate_fast_color_clear sometimes flushes the context */
742          flush = !flushed;
743 
744          /* Disable CMASK if flush_resource isn't going
745           * to be called.
746           */
747          if (tex->cmask_buffer)
748             si_texture_discard_cmask(sscreen, tex);
749       }
750 
751       /* Set metadata. */
752       if ((!res->b.is_shared || update_metadata) && whandle->offset == 0)
753          si_set_tex_bo_metadata(sscreen, tex);
754 
755       if (sscreen->info.gfx_level >= GFX9) {
756          slice_size = tex->surface.u.gfx9.surf_slice_size;
757       } else {
758          slice_size = (uint64_t)tex->surface.u.legacy.level[0].slice_size_dw * 4;
759       }
760 
761       modifier = tex->surface.modifier;
762    } else {
763       tc_buffer_disable_cpu_storage(&res->b.b);
764 
765       /* Buffer exports are for the OpenCL interop. */
766       /* Move a suballocated buffer into a non-suballocated allocation. */
767       if (sscreen->ws->buffer_is_suballocated(res->buf) ||
768           /* A DMABUF export always fails if the BO is local. */
769           (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
770            sscreen->info.has_local_buffers)) {
771          assert(!res->b.is_shared);
772 
773          /* Allocate a new buffer with PIPE_BIND_SHARED. */
774          struct pipe_resource templ = res->b.b;
775          templ.bind |= PIPE_BIND_SHARED;
776 
777          struct pipe_resource *newb = screen->resource_create(screen, &templ);
778          if (!newb) {
779             if (!ctx)
780                si_put_aux_context_flush(sscreen);
781             return false;
782          }
783 
784          /* Copy the old buffer contents to the new one. */
785          struct pipe_box box;
786          u_box_1d(0, newb->width0, &box);
787          sctx->b.resource_copy_region(&sctx->b, newb, 0, 0, 0, 0, &res->b.b, 0, &box);
788          flush = true;
789          /* Move the new buffer storage to the old pipe_resource. */
790          si_replace_buffer_storage(&sctx->b, &res->b.b, newb, 0, 0, 0);
791          pipe_resource_reference(&newb, NULL);
792 
793          assert(res->b.b.bind & PIPE_BIND_SHARED);
794          assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
795       }
796 
797       /* Buffers */
798       slice_size = 0;
799    }
800 
801    si_texture_get_info(screen, resource, &stride, &offset);
802 
803    if (res->b.is_shared) {
804       /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
805        * doesn't set it.
806        */
807       res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
808       if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
809          res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
810    } else {
811       res->b.is_shared = true;
812       res->external_usage = usage;
813    }
814 
815    if (flush && ctx)
816       sctx->b.flush(&sctx->b, NULL, 0);
817    if (!ctx)
818       si_put_aux_context_flush(sscreen);
819 
820    whandle->stride = stride;
821    whandle->offset = offset + slice_size * whandle->layer;
822    whandle->modifier = modifier;
823 
824    return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, whandle);
825 }
826 
si_print_texture_info(struct si_screen * sscreen,struct si_texture * tex,struct u_log_context * log)827 void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex,
828                            struct u_log_context *log)
829 {
830    int i;
831    FILE *f;
832    char *surf_info = NULL;
833    size_t surf_info_size;
834 
835    /* Common parameters. */
836    u_log_printf(log,
837                 "  Info: npix_x=%u, npix_y=%u, npix_z=%u, "
838                 "array_size=%u, last_level=%u, nsamples=%u",
839                 tex->buffer.b.b.width0, tex->buffer.b.b.height0,
840                 tex->buffer.b.b.depth0, tex->buffer.b.b.array_size,
841                 tex->buffer.b.b.last_level, tex->buffer.b.b.nr_samples);
842 
843    if (tex->is_depth && tex->surface.meta_offset)
844       u_log_printf(log, ", tc_compatible_htile=%u", tex->tc_compatible_htile);
845 
846    u_log_printf(log, ", %s\n",
847                 util_format_short_name(tex->buffer.b.b.format));
848 
849    f = open_memstream(&surf_info, &surf_info_size);
850    if (!f)
851       return;
852    ac_surface_print_info(f, &sscreen->info, &tex->surface);
853    fclose(f);
854    u_log_printf(log, "%s", surf_info);
855    free(surf_info);
856 
857    if (sscreen->info.gfx_level >= GFX9) {
858       return;
859    }
860 
861    if (!tex->is_depth && tex->surface.meta_offset) {
862       for (i = 0; i <= tex->buffer.b.b.last_level; i++)
863          u_log_printf(log,
864                       "    DCCLevel[%i]: enabled=%u, offset=%u, "
865                       "fast_clear_size=%u\n",
866                       i, i < tex->surface.num_meta_levels, tex->surface.u.legacy.color.dcc_level[i].dcc_offset,
867                       tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size);
868    }
869 
870    for (i = 0; i <= tex->buffer.b.b.last_level; i++)
871       u_log_printf(log,
872                    "    Level[%i]: offset=%" PRIu64 ", slice_size=%" PRIu64 ", "
873                    "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
874                    "mode=%u, tiling_index = %u\n",
875                    i, (uint64_t)tex->surface.u.legacy.level[i].offset_256B * 256,
876                    (uint64_t)tex->surface.u.legacy.level[i].slice_size_dw * 4,
877                    u_minify(tex->buffer.b.b.width0, i), u_minify(tex->buffer.b.b.height0, i),
878                    u_minify(tex->buffer.b.b.depth0, i), tex->surface.u.legacy.level[i].nblk_x,
879                    tex->surface.u.legacy.level[i].nblk_y, tex->surface.u.legacy.level[i].mode,
880                    tex->surface.u.legacy.tiling_index[i]);
881 
882    if (tex->surface.has_stencil) {
883       for (i = 0; i <= tex->buffer.b.b.last_level; i++) {
884          u_log_printf(log,
885                       "    StencilLevel[%i]: offset=%" PRIu64 ", "
886                       "slice_size=%" PRIu64 ", npix_x=%u, "
887                       "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
888                       "mode=%u, tiling_index = %u\n",
889                       i, (uint64_t)tex->surface.u.legacy.zs.stencil_level[i].offset_256B * 256,
890                       (uint64_t)tex->surface.u.legacy.zs.stencil_level[i].slice_size_dw * 4,
891                       u_minify(tex->buffer.b.b.width0, i), u_minify(tex->buffer.b.b.height0, i),
892                       u_minify(tex->buffer.b.b.depth0, i),
893                       tex->surface.u.legacy.zs.stencil_level[i].nblk_x,
894                       tex->surface.u.legacy.zs.stencil_level[i].nblk_y,
895                       tex->surface.u.legacy.zs.stencil_level[i].mode,
896                       tex->surface.u.legacy.zs.stencil_tiling_index[i]);
897       }
898    }
899 }
900 
901 /**
902  * Common function for si_texture_create and si_texture_from_handle.
903  *
904  * \param screen       screen
905  * \param base         resource template
906  * \param surface      radeon_surf
907  * \param plane0       if a non-zero plane is being created, this is the first plane
908  * \param imported_buf from si_texture_from_handle
909  * \param offset       offset for non-zero planes or imported buffers
910  * \param alloc_size   the size to allocate if plane0 != NULL
911  * \param alignment    alignment for the allocation
912  */
si_texture_create_object(struct pipe_screen * screen,const struct pipe_resource * base,const struct radeon_surf * surface,const struct si_texture * plane0,struct pb_buffer * imported_buf,uint64_t offset,unsigned pitch_in_bytes,uint64_t alloc_size,unsigned alignment)913 static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
914                                                    const struct pipe_resource *base,
915                                                    const struct radeon_surf *surface,
916                                                    const struct si_texture *plane0,
917                                                    struct pb_buffer *imported_buf,
918                                                    uint64_t offset, unsigned pitch_in_bytes,
919                                                    uint64_t alloc_size, unsigned alignment)
920 {
921    struct si_texture *tex;
922    struct si_resource *resource;
923    struct si_screen *sscreen = (struct si_screen *)screen;
924 
925    if (!sscreen->info.has_3d_cube_border_color_mipmap &&
926        (base->last_level > 0 ||
927         base->target == PIPE_TEXTURE_3D ||
928         base->target == PIPE_TEXTURE_CUBE)) {
929       assert(0);
930       return NULL;
931    }
932 
933    tex = CALLOC_STRUCT_CL(si_texture);
934    if (!tex)
935       goto error;
936 
937    resource = &tex->buffer;
938    resource->b.b = *base;
939    pipe_reference_init(&resource->b.b.reference, 1);
940    resource->b.b.screen = screen;
941 
942    /* don't include stencil-only formats which we don't support for rendering */
943    tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format));
944    tex->surface = *surface;
945 
946    /* Use 1.0 as the default clear value to get optimal ZRANGE_PRECISION if we don't
947     * get a fast clear.
948     */
949    for (unsigned i = 0; i < ARRAY_SIZE(tex->depth_clear_value); i++)
950       tex->depth_clear_value[i] = 1.0;
951 
952    /* On GFX8, HTILE uses different tiling depending on the TC_COMPATIBLE_HTILE
953     * setting, so we have to enable it if we enabled it at allocation.
954     *
955     * GFX9 and later use the same tiling for both, so TC-compatible HTILE can be
956     * enabled on demand.
957     */
958    tex->tc_compatible_htile = (sscreen->info.gfx_level == GFX8 &&
959                                tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ||
960                               /* Mipmapping always starts TC-compatible. */
961                               (sscreen->info.gfx_level >= GFX8 &&
962                                tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE &&
963                                tex->buffer.b.b.last_level > 0);
964 
965    /* TC-compatible HTILE:
966     * - GFX8 only supports Z32_FLOAT.
967     * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
968    if (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
969       if (sscreen->info.gfx_level >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM)
970          tex->db_render_format = base->format;
971       else {
972          tex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
973          tex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
974                                base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
975       }
976    } else {
977       tex->db_render_format = base->format;
978    }
979 
980    /* Applies to GCN. */
981    tex->last_msaa_resolve_target_micro_mode = tex->surface.micro_tile_mode;
982 
983    if (!ac_surface_override_offset_stride(&sscreen->info, &tex->surface,
984                                      tex->buffer.b.b.last_level + 1,
985                                           offset, pitch_in_bytes / tex->surface.bpe))
986       goto error;
987 
988    if (tex->is_depth) {
989       tex->htile_stencil_disabled = !tex->surface.has_stencil;
990 
991       if (sscreen->info.gfx_level >= GFX9) {
992          tex->can_sample_z = true;
993          tex->can_sample_s = true;
994 
995          /* Stencil texturing with HTILE doesn't work
996           * with mipmapping on Navi10-14. */
997          if (sscreen->info.gfx_level == GFX10 && base->last_level > 0)
998             tex->htile_stencil_disabled = true;
999       } else {
1000          tex->can_sample_z = !tex->surface.u.legacy.depth_adjusted;
1001          tex->can_sample_s = !tex->surface.u.legacy.stencil_adjusted;
1002 
1003          /* GFX8 must keep stencil enabled because it can't use Z-only TC-compatible
1004           * HTILE because of a hw bug. This has only a small effect on performance
1005           * because we lose a little bit of Z precision in order to make space for
1006           * stencil in HTILE.
1007           */
1008          if (sscreen->info.gfx_level == GFX8 &&
1009              tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE)
1010             tex->htile_stencil_disabled = false;
1011       }
1012 
1013       tex->db_compatible = surface->flags & RADEON_SURF_ZBUFFER;
1014    } else {
1015       if (tex->surface.cmask_offset) {
1016          assert(sscreen->info.gfx_level < GFX11);
1017          tex->cb_color_info |= S_028C70_FAST_CLEAR(1);
1018          tex->cmask_buffer = &tex->buffer;
1019       }
1020    }
1021 
1022    if (plane0) {
1023       /* The buffer is shared with the first plane. */
1024       resource->bo_size = plane0->buffer.bo_size;
1025       resource->bo_alignment_log2 = plane0->buffer.bo_alignment_log2;
1026       resource->flags = plane0->buffer.flags;
1027       resource->domains = plane0->buffer.domains;
1028       resource->memory_usage_kb = plane0->buffer.memory_usage_kb;
1029 
1030       radeon_bo_reference(sscreen->ws, &resource->buf, plane0->buffer.buf);
1031       resource->gpu_address = plane0->buffer.gpu_address;
1032    } else if (!(surface->flags & RADEON_SURF_IMPORTED)) {
1033       if (base->flags & PIPE_RESOURCE_FLAG_SPARSE)
1034          resource->b.b.flags |= PIPE_RESOURCE_FLAG_UNMAPPABLE;
1035       if (base->bind & PIPE_BIND_PRIME_BLIT_DST)
1036          resource->b.b.flags |= SI_RESOURCE_FLAG_GL2_BYPASS;
1037 
1038       /* Create the backing buffer. */
1039       si_init_resource_fields(sscreen, resource, alloc_size, alignment);
1040 
1041       if (!si_alloc_resource(sscreen, resource))
1042          goto error;
1043    } else {
1044       resource->buf = imported_buf;
1045       resource->gpu_address = sscreen->ws->buffer_get_virtual_address(resource->buf);
1046       resource->bo_size = imported_buf->size;
1047       resource->bo_alignment_log2 = imported_buf->alignment_log2;
1048       resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
1049       resource->memory_usage_kb = MAX2(1, resource->bo_size / 1024);
1050       if (sscreen->ws->buffer_get_flags)
1051          resource->flags = sscreen->ws->buffer_get_flags(resource->buf);
1052    }
1053 
1054    /* Prepare metadata clears.  */
1055    struct si_clear_info clears[4];
1056    unsigned num_clears = 0;
1057 
1058    if (tex->cmask_buffer) {
1059       /* Initialize the cmask to 0xCC (= compressed state). */
1060       assert(num_clears < ARRAY_SIZE(clears));
1061       si_init_buffer_clear(&clears[num_clears++], &tex->cmask_buffer->b.b,
1062                            tex->surface.cmask_offset, tex->surface.cmask_size,
1063                            0xCCCCCCCC);
1064    }
1065    if (tex->is_depth && tex->surface.meta_offset) {
1066       uint32_t clear_value = 0;
1067 
1068       if (sscreen->info.gfx_level >= GFX9 || tex->tc_compatible_htile)
1069          clear_value = 0x0000030F;
1070 
1071       assert(num_clears < ARRAY_SIZE(clears));
1072       si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1073                            tex->surface.meta_size, clear_value);
1074    }
1075 
1076    /* Initialize DCC only if the texture is not being imported. */
1077    if (!(surface->flags & RADEON_SURF_IMPORTED) && !tex->is_depth && tex->surface.meta_offset) {
1078       /* Clear DCC to black for all tiles with DCC enabled.
1079        *
1080        * This fixes corruption in 3DMark Slingshot Extreme, which
1081        * uses uninitialized textures, causing corruption.
1082        */
1083       if (tex->surface.num_meta_levels == tex->buffer.b.b.last_level + 1 &&
1084           tex->buffer.b.b.nr_samples <= 2) {
1085          /* Simple case - all tiles have DCC enabled. */
1086          assert(num_clears < ARRAY_SIZE(clears));
1087          si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1088                               tex->surface.meta_size, DCC_CLEAR_0000);
1089       } else if (sscreen->info.gfx_level >= GFX9) {
1090          /* Clear to uncompressed. Clearing this to black is complicated. */
1091          assert(num_clears < ARRAY_SIZE(clears));
1092          si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1093                               tex->surface.meta_size, DCC_UNCOMPRESSED);
1094       } else {
1095          /* GFX8: Initialize mipmap levels and multisamples separately. */
1096          if (tex->buffer.b.b.nr_samples >= 2) {
1097             /* Clearing this to black is complicated. */
1098             assert(num_clears < ARRAY_SIZE(clears));
1099             si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1100                                  tex->surface.meta_size, DCC_UNCOMPRESSED);
1101          } else {
1102             /* Clear the enabled mipmap levels to black. */
1103             unsigned size = 0;
1104 
1105             for (unsigned i = 0; i < tex->surface.num_meta_levels; i++) {
1106                if (!tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size)
1107                   break;
1108 
1109                size = tex->surface.u.legacy.color.dcc_level[i].dcc_offset +
1110                       tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size;
1111             }
1112 
1113             /* Mipmap levels with DCC. */
1114             if (size) {
1115                assert(num_clears < ARRAY_SIZE(clears));
1116                si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, size,
1117                                     DCC_CLEAR_0000);
1118             }
1119             /* Mipmap levels without DCC. */
1120             if (size != tex->surface.meta_size) {
1121                assert(num_clears < ARRAY_SIZE(clears));
1122                si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset + size,
1123                                     tex->surface.meta_size - size, DCC_UNCOMPRESSED);
1124             }
1125          }
1126       }
1127    }
1128 
1129    /* Initialize displayable DCC that requires the retile blit. */
1130    if (tex->surface.display_dcc_offset && !(surface->flags & RADEON_SURF_IMPORTED)) {
1131       /* Uninitialized DCC can hang the display hw.
1132        * Clear to white to indicate that. */
1133       assert(num_clears < ARRAY_SIZE(clears));
1134       si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.display_dcc_offset,
1135                            tex->surface.u.gfx9.color.display_dcc_size,
1136                            sscreen->info.gfx_level >= GFX11 ? GFX11_DCC_CLEAR_1111_UNORM
1137                                                              : GFX8_DCC_CLEAR_1111);
1138    }
1139 
1140    /* Execute the clears. */
1141    if (num_clears) {
1142       si_execute_clears(si_get_aux_context(sscreen), clears, num_clears, 0);
1143       si_put_aux_context_flush(sscreen);
1144    }
1145 
1146    /* Initialize the CMASK base register value. */
1147    tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
1148 
1149    if (sscreen->debug_flags & DBG(VM)) {
1150       fprintf(stderr,
1151               "VM start=0x%" PRIX64 "  end=0x%" PRIX64
1152               " | Texture %ix%ix%i, %i levels, %i samples, %s\n",
1153               tex->buffer.gpu_address, tex->buffer.gpu_address + tex->buffer.buf->size,
1154               base->width0, base->height0, util_num_layers(base, 0), base->last_level + 1,
1155               base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
1156    }
1157 
1158    if (sscreen->debug_flags & DBG(TEX)) {
1159       puts("Texture:");
1160       struct u_log_context log;
1161       u_log_context_init(&log);
1162       si_print_texture_info(sscreen, tex, &log);
1163       u_log_new_page_print(&log, stdout);
1164       fflush(stdout);
1165       u_log_context_destroy(&log);
1166    }
1167 
1168    return tex;
1169 
1170 error:
1171    FREE_CL(tex);
1172    return NULL;
1173 }
1174 
si_choose_tiling(struct si_screen * sscreen,const struct pipe_resource * templ,bool tc_compatible_htile)1175 static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen,
1176                                               const struct pipe_resource *templ,
1177                                               bool tc_compatible_htile)
1178 {
1179    const struct util_format_description *desc = util_format_description(templ->format);
1180    bool force_tiling = templ->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING;
1181    bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
1182                            !(templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH);
1183 
1184    /* MSAA resources must be 2D tiled. */
1185    if (templ->nr_samples > 1)
1186       return RADEON_SURF_MODE_2D;
1187 
1188    /* Transfer resources should be linear. */
1189    if (templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR)
1190       return RADEON_SURF_MODE_LINEAR_ALIGNED;
1191 
1192    /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8,
1193     * which requires 2D tiling.
1194     */
1195    if (sscreen->info.gfx_level == GFX8 && tc_compatible_htile)
1196       return RADEON_SURF_MODE_2D;
1197 
1198    /* Handle common candidates for the linear mode.
1199     * Compressed textures and DB surfaces must always be tiled.
1200     */
1201    if (!force_tiling && !is_depth_stencil && !util_format_is_compressed(templ->format)) {
1202       if (sscreen->debug_flags & DBG(NO_TILING) ||
1203           (templ->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_TILING)))
1204          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1205 
1206       /* Tiling doesn't work with the 422 (SUBSAMPLED) formats. */
1207       if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
1208          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1209 
1210       /* Cursors are linear on AMD GCN.
1211        * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
1212       if (templ->bind & PIPE_BIND_CURSOR)
1213          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1214 
1215       if (templ->bind & PIPE_BIND_LINEAR)
1216          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1217 
1218       /* Textures with a very small height are recommended to be linear. */
1219       if (templ->target == PIPE_TEXTURE_1D || templ->target == PIPE_TEXTURE_1D_ARRAY ||
1220           /* Only very thin and long 2D textures should benefit from
1221            * linear_aligned. */
1222           templ->height0 <= 2)
1223          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1224 
1225       /* Textures likely to be mapped often. */
1226       if (templ->usage == PIPE_USAGE_STAGING || templ->usage == PIPE_USAGE_STREAM)
1227          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1228    }
1229 
1230    /* Make small textures 1D tiled. */
1231    if (templ->width0 <= 16 || templ->height0 <= 16 || (sscreen->debug_flags & DBG(NO_2D_TILING)))
1232       return RADEON_SURF_MODE_1D;
1233 
1234    /* The allocator will switch to 1D if needed. */
1235    return RADEON_SURF_MODE_2D;
1236 }
1237 
1238 static struct pipe_resource *
si_texture_create_with_modifier(struct pipe_screen * screen,const struct pipe_resource * templ,uint64_t modifier)1239 si_texture_create_with_modifier(struct pipe_screen *screen,
1240                                 const struct pipe_resource *templ,
1241                                 uint64_t modifier)
1242 {
1243    struct si_screen *sscreen = (struct si_screen *)screen;
1244    bool is_zs = util_format_is_depth_or_stencil(templ->format);
1245 
1246    if (templ->nr_samples >= 2) {
1247       /* This is hackish (overwriting the const pipe_resource template),
1248        * but should be harmless and gallium frontends can also see
1249        * the overriden number of samples in the created pipe_resource.
1250        */
1251       if (is_zs && sscreen->eqaa_force_z_samples) {
1252          ((struct pipe_resource *)templ)->nr_samples =
1253             ((struct pipe_resource *)templ)->nr_storage_samples = sscreen->eqaa_force_z_samples;
1254       } else if (!is_zs && sscreen->eqaa_force_color_samples) {
1255          ((struct pipe_resource *)templ)->nr_samples = sscreen->eqaa_force_coverage_samples;
1256          ((struct pipe_resource *)templ)->nr_storage_samples = sscreen->eqaa_force_color_samples;
1257       }
1258    }
1259 
1260    bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH ||
1261                            templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR;
1262    bool tc_compatible_htile =
1263       sscreen->info.gfx_level >= GFX8 &&
1264       /* There are issues with TC-compatible HTILE on Tonga (and
1265        * Iceland is the same design), and documented bug workarounds
1266        * don't help. For example, this fails:
1267        *   piglit/bin/tex-miplevel-selection 'texture()' 2DShadow -auto
1268        */
1269       sscreen->info.family != CHIP_TONGA && sscreen->info.family != CHIP_ICELAND &&
1270       (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
1271       !(sscreen->debug_flags & DBG(NO_HYPERZ)) && !is_flushed_depth &&
1272       is_zs;
1273    enum radeon_surf_mode tile_mode = si_choose_tiling(sscreen, templ, tc_compatible_htile);
1274 
1275    /* This allocates textures with multiple planes like NV12 in 1 buffer. */
1276    enum
1277    {
1278       SI_TEXTURE_MAX_PLANES = 3
1279    };
1280    struct radeon_surf surface[SI_TEXTURE_MAX_PLANES] = {};
1281    struct pipe_resource plane_templ[SI_TEXTURE_MAX_PLANES];
1282    uint64_t plane_offset[SI_TEXTURE_MAX_PLANES] = {};
1283    uint64_t total_size = 0;
1284    unsigned max_alignment = 0;
1285    unsigned num_planes = util_format_get_num_planes(templ->format);
1286    assert(num_planes <= SI_TEXTURE_MAX_PLANES);
1287 
1288    /* Compute texture or plane layouts and offsets. */
1289    for (unsigned i = 0; i < num_planes; i++) {
1290       plane_templ[i] = *templ;
1291       plane_templ[i].format = util_format_get_plane_format(templ->format, i);
1292       plane_templ[i].width0 = util_format_get_plane_width(templ->format, i, templ->width0);
1293       plane_templ[i].height0 = util_format_get_plane_height(templ->format, i, templ->height0);
1294 
1295       /* Multi-plane allocations need PIPE_BIND_SHARED, because we can't
1296        * reallocate the storage to add PIPE_BIND_SHARED, because it's
1297        * shared by 3 pipe_resources.
1298        */
1299       if (num_planes > 1)
1300          plane_templ[i].bind |= PIPE_BIND_SHARED;
1301 
1302       if (si_init_surface(sscreen, &surface[i], &plane_templ[i], tile_mode, modifier,
1303                           false, plane_templ[i].bind & PIPE_BIND_SCANOUT,
1304                           is_flushed_depth, tc_compatible_htile))
1305          return NULL;
1306 
1307       plane_templ[i].nr_sparse_levels = surface[i].first_mip_tail_level;
1308 
1309       plane_offset[i] = align64(total_size, 1 << surface[i].surf_alignment_log2);
1310       total_size = plane_offset[i] + surface[i].total_size;
1311       max_alignment = MAX2(max_alignment, 1 << surface[i].surf_alignment_log2);
1312    }
1313 
1314    struct si_texture *plane0 = NULL, *last_plane = NULL;
1315 
1316    for (unsigned i = 0; i < num_planes; i++) {
1317       struct si_texture *tex =
1318          si_texture_create_object(screen, &plane_templ[i], &surface[i], plane0, NULL,
1319                                   plane_offset[i], 0, total_size, max_alignment);
1320       if (!tex) {
1321          si_texture_reference(&plane0, NULL);
1322          return NULL;
1323       }
1324 
1325       tex->plane_index = i;
1326       tex->num_planes = num_planes;
1327 
1328       if (!plane0) {
1329          plane0 = last_plane = tex;
1330       } else {
1331          last_plane->buffer.b.b.next = &tex->buffer.b.b;
1332          last_plane = tex;
1333       }
1334    }
1335 
1336    return (struct pipe_resource *)plane0;
1337 }
1338 
si_texture_create(struct pipe_screen * screen,const struct pipe_resource * templ)1339 struct pipe_resource *si_texture_create(struct pipe_screen *screen,
1340                                         const struct pipe_resource *templ)
1341 {
1342    return si_texture_create_with_modifier(screen, templ, DRM_FORMAT_MOD_INVALID);
1343 }
1344 
si_texture_commit(struct si_context * ctx,struct si_resource * res,unsigned level,struct pipe_box * box,bool commit)1345 bool si_texture_commit(struct si_context *ctx, struct si_resource *res, unsigned level,
1346                        struct pipe_box *box, bool commit)
1347 {
1348    struct si_texture *tex = (struct si_texture *)res;
1349    struct radeon_surf *surface = &tex->surface;
1350    enum pipe_format format = res->b.b.format;
1351    unsigned blks = util_format_get_blocksize(format);
1352    unsigned samples = MAX2(1, res->b.b.nr_samples);
1353 
1354    assert(ctx->gfx_level >= GFX9);
1355 
1356    unsigned row_pitch = surface->u.gfx9.prt_level_pitch[level] *
1357       surface->prt_tile_height * surface->prt_tile_depth * blks * samples;
1358    unsigned depth_pitch = surface->u.gfx9.surf_slice_size * surface->prt_tile_depth;
1359 
1360    unsigned x = box->x / surface->prt_tile_width;
1361    unsigned y = box->y / surface->prt_tile_height;
1362    unsigned z = box->z / surface->prt_tile_depth;
1363 
1364    unsigned w = DIV_ROUND_UP(box->width, surface->prt_tile_width);
1365    unsigned h = DIV_ROUND_UP(box->height, surface->prt_tile_height);
1366    unsigned d = DIV_ROUND_UP(box->depth, surface->prt_tile_depth);
1367 
1368    /* Align to tile block base, for levels in mip tail whose offset is inside
1369     * a tile block.
1370     */
1371    unsigned level_base = ROUND_DOWN_TO(surface->u.gfx9.prt_level_offset[level],
1372                                        RADEON_SPARSE_PAGE_SIZE);
1373    unsigned commit_base = level_base +
1374       x * RADEON_SPARSE_PAGE_SIZE + y * row_pitch + z * depth_pitch;
1375 
1376    unsigned size = w * RADEON_SPARSE_PAGE_SIZE;
1377    for (int i = 0; i < d; i++) {
1378       unsigned base = commit_base + i * depth_pitch;
1379       for (int j = 0; j < h; j++) {
1380          unsigned offset = base + j * row_pitch;
1381          if (!ctx->ws->buffer_commit(ctx->ws, res->buf, offset, size, commit))
1382             return false;
1383       }
1384    }
1385 
1386    return true;
1387 }
1388 
si_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * count)1389 static void si_query_dmabuf_modifiers(struct pipe_screen *screen,
1390                                       enum pipe_format format,
1391                                       int max,
1392                                       uint64_t *modifiers,
1393                                       unsigned int *external_only,
1394                                       int *count)
1395 {
1396    struct si_screen *sscreen = (struct si_screen *)screen;
1397 
1398    unsigned ac_mod_count = max;
1399    ac_get_supported_modifiers(&sscreen->info, &(struct ac_modifier_options) {
1400          .dcc = !(sscreen->debug_flags & DBG(NO_DCC)),
1401          /* Do not support DCC with retiling yet. This needs explicit
1402           * resource flushes, but the app has no way to promise doing
1403           * flushes with modifiers. */
1404          .dcc_retile = !(sscreen->debug_flags & DBG(NO_DCC)),
1405       }, format, &ac_mod_count,  max ? modifiers : NULL);
1406    if (max && external_only) {
1407       for (unsigned i = 0; i < ac_mod_count; ++i)
1408          external_only[i] = util_format_is_yuv(format);
1409    }
1410    *count = ac_mod_count;
1411 }
1412 
1413 static bool
si_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)1414 si_is_dmabuf_modifier_supported(struct pipe_screen *screen,
1415                                uint64_t modifier,
1416                                enum pipe_format format,
1417                                bool *external_only)
1418 {
1419    int allowed_mod_count;
1420    si_query_dmabuf_modifiers(screen, format, 0, NULL, NULL, &allowed_mod_count);
1421 
1422    uint64_t *allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t));
1423    if (!allowed_modifiers)
1424       return false;
1425 
1426    unsigned *external_array = NULL;
1427    if (external_only) {
1428       external_array = (unsigned *)calloc(allowed_mod_count, sizeof(unsigned));
1429       if (!external_array) {
1430          free(allowed_modifiers);
1431          return false;
1432       }
1433    }
1434 
1435    si_query_dmabuf_modifiers(screen, format, allowed_mod_count, allowed_modifiers,
1436                             external_array, &allowed_mod_count);
1437 
1438    bool supported = false;
1439    for (int i = 0; i < allowed_mod_count && !supported; ++i) {
1440       if (allowed_modifiers[i] != modifier)
1441          continue;
1442 
1443       supported = true;
1444       if (external_only)
1445          *external_only = external_array[i];
1446    }
1447 
1448    free(allowed_modifiers);
1449    free(external_array);
1450    return supported;
1451 }
1452 
1453 static unsigned
si_get_dmabuf_modifier_planes(struct pipe_screen * pscreen,uint64_t modifier,enum pipe_format format)1454 si_get_dmabuf_modifier_planes(struct pipe_screen *pscreen, uint64_t modifier,
1455                              enum pipe_format format)
1456 {
1457    unsigned planes = util_format_get_num_planes(format);
1458 
1459    if (IS_AMD_FMT_MOD(modifier) && planes == 1) {
1460       if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
1461          return 3;
1462       else if (AMD_FMT_MOD_GET(DCC, modifier))
1463          return 2;
1464       else
1465          return 1;
1466    }
1467 
1468    return planes;
1469 }
1470 
1471 static bool
si_modifier_supports_resource(struct pipe_screen * screen,uint64_t modifier,const struct pipe_resource * templ)1472 si_modifier_supports_resource(struct pipe_screen *screen,
1473                               uint64_t modifier,
1474                               const struct pipe_resource *templ)
1475 {
1476    struct si_screen *sscreen = (struct si_screen *)screen;
1477    uint32_t max_width, max_height;
1478 
1479    ac_modifier_max_extent(&sscreen->info, modifier, &max_width, &max_height);
1480    return templ->width0 <= max_width && templ->height0 <= max_height;
1481 }
1482 
1483 static struct pipe_resource *
si_texture_create_with_modifiers(struct pipe_screen * screen,const struct pipe_resource * templ,const uint64_t * modifiers,int modifier_count)1484 si_texture_create_with_modifiers(struct pipe_screen *screen,
1485                                  const struct pipe_resource *templ,
1486                                  const uint64_t *modifiers,
1487                                  int modifier_count)
1488 {
1489    /* Buffers with modifiers make zero sense. */
1490    assert(templ->target != PIPE_BUFFER);
1491 
1492    /* Select modifier. */
1493    int allowed_mod_count;
1494    si_query_dmabuf_modifiers(screen, templ->format, 0, NULL, NULL, &allowed_mod_count);
1495 
1496    uint64_t *allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t));
1497    if (!allowed_modifiers) {
1498       return NULL;
1499    }
1500 
1501    /* This does not take external_only into account. We assume it is the same for all modifiers. */
1502    si_query_dmabuf_modifiers(screen, templ->format, allowed_mod_count, allowed_modifiers, NULL, &allowed_mod_count);
1503 
1504    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1505 
1506    /* Try to find the first allowed modifier that is in the application provided
1507     * list. We assume that the allowed modifiers are ordered in descending
1508     * preference in the list provided by si_query_dmabuf_modifiers. */
1509    for (int i = 0; i < allowed_mod_count; ++i) {
1510       bool found = false;
1511       for (int j = 0; j < modifier_count && !found; ++j)
1512          if (modifiers[j] == allowed_modifiers[i] && si_modifier_supports_resource(screen, modifiers[j], templ))
1513             found = true;
1514 
1515       if (found) {
1516          modifier = allowed_modifiers[i];
1517          break;
1518       }
1519    }
1520 
1521    free(allowed_modifiers);
1522 
1523    if (modifier == DRM_FORMAT_MOD_INVALID) {
1524       return NULL;
1525    }
1526    return si_texture_create_with_modifier(screen, templ, modifier);
1527 }
1528 
si_texture_is_aux_plane(const struct pipe_resource * resource)1529 static bool si_texture_is_aux_plane(const struct pipe_resource *resource)
1530 {
1531    return resource->flags & SI_RESOURCE_AUX_PLANE;
1532 }
1533 
si_texture_from_winsys_buffer(struct si_screen * sscreen,const struct pipe_resource * templ,struct pb_buffer * buf,unsigned stride,uint64_t offset,uint64_t modifier,unsigned usage,bool dedicated)1534 static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *sscreen,
1535                                                            const struct pipe_resource *templ,
1536                                                            struct pb_buffer *buf, unsigned stride,
1537                                                            uint64_t offset, uint64_t modifier,
1538                                                            unsigned usage, bool dedicated)
1539 {
1540    struct radeon_surf surface = {};
1541    struct radeon_bo_metadata metadata = {};
1542    struct si_texture *tex;
1543    int r;
1544 
1545    /* Ignore metadata for non-zero planes. */
1546    if (offset != 0)
1547       dedicated = false;
1548 
1549    if (dedicated) {
1550       sscreen->ws->buffer_get_metadata(sscreen->ws, buf, &metadata, &surface);
1551    } else {
1552       /**
1553        * The bo metadata is unset for un-dedicated images. So we fall
1554        * back to linear. See answer to question 5 of the
1555        * VK_KHX_external_memory spec for some details.
1556        *
1557        * It is possible that this case isn't going to work if the
1558        * surface pitch isn't correctly aligned by default.
1559        *
1560        * In order to support it correctly we require multi-image
1561        * metadata to be synchronized between radv and radeonsi. The
1562        * semantics of associating multiple image metadata to a memory
1563        * object on the vulkan export side are not concretely defined
1564        * either.
1565        *
1566        * All the use cases we are aware of at the moment for memory
1567        * objects use dedicated allocations. So lets keep the initial
1568        * implementation simple.
1569        *
1570        * A possible alternative is to attempt to reconstruct the
1571        * tiling information when the TexParameter TEXTURE_TILING_EXT
1572        * is set.
1573        */
1574       metadata.mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
1575    }
1576 
1577    r = si_init_surface(sscreen, &surface, templ, metadata.mode, modifier, true,
1578                        surface.flags & RADEON_SURF_SCANOUT, false, false);
1579    if (r)
1580       return NULL;
1581 
1582    tex = si_texture_create_object(&sscreen->b, templ, &surface, NULL, buf,
1583                                   offset, stride, 0, 0);
1584    if (!tex)
1585       return NULL;
1586 
1587    tex->buffer.b.is_shared = true;
1588    tex->buffer.external_usage = usage;
1589    tex->num_planes = 1;
1590    if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)
1591       tex->buffer.b.b.bind |= PIPE_BIND_PROTECTED;
1592 
1593    /* Account for multiple planes with lowered yuv import. */
1594    struct pipe_resource *next_plane = tex->buffer.b.b.next;
1595    while (next_plane && !si_texture_is_aux_plane(next_plane)) {
1596       struct si_texture *next_tex = (struct si_texture *)next_plane;
1597       ++next_tex->num_planes;
1598       ++tex->num_planes;
1599       next_plane = next_plane->next;
1600    }
1601 
1602    unsigned nplanes = ac_surface_get_nplanes(&tex->surface);
1603    unsigned plane = 1;
1604    while (next_plane) {
1605       struct si_auxiliary_texture *ptex = (struct si_auxiliary_texture *)next_plane;
1606       if (plane >= nplanes || ptex->buffer != tex->buffer.buf ||
1607           ptex->offset != ac_surface_get_plane_offset(sscreen->info.gfx_level,
1608                                                       &tex->surface, plane, 0) ||
1609           ptex->stride != ac_surface_get_plane_stride(sscreen->info.gfx_level,
1610                                                       &tex->surface, plane, 0)) {
1611          si_texture_reference(&tex, NULL);
1612          return NULL;
1613       }
1614       ++plane;
1615       next_plane = next_plane->next;
1616    }
1617 
1618    if (plane != nplanes && tex->num_planes == 1) {
1619       si_texture_reference(&tex, NULL);
1620       return NULL;
1621    }
1622 
1623    if (!ac_surface_set_umd_metadata(&sscreen->info, &tex->surface,
1624                                     tex->buffer.b.b.nr_storage_samples,
1625                                     tex->buffer.b.b.last_level + 1,
1626                                     metadata.size_metadata,
1627                                     metadata.metadata)) {
1628       si_texture_reference(&tex, NULL);
1629       return NULL;
1630    }
1631 
1632    if (ac_surface_get_plane_offset(sscreen->info.gfx_level, &tex->surface, 0, 0) +
1633         tex->surface.total_size > buf->size ||
1634        buf->alignment_log2 < tex->surface.alignment_log2) {
1635       si_texture_reference(&tex, NULL);
1636       return NULL;
1637    }
1638 
1639    /* Displayable DCC requires an explicit flush. */
1640    if (dedicated && offset == 0 && !(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
1641        si_displayable_dcc_needs_explicit_flush(tex)) {
1642       /* TODO: do we need to decompress DCC? */
1643       if (si_texture_discard_dcc(sscreen, tex)) {
1644          /* Update BO metadata after disabling DCC. */
1645          si_set_tex_bo_metadata(sscreen, tex);
1646       }
1647    }
1648 
1649    assert(tex->surface.tile_swizzle == 0);
1650    return &tex->buffer.b.b;
1651 }
1652 
si_texture_from_handle(struct pipe_screen * screen,const struct pipe_resource * templ,struct winsys_handle * whandle,unsigned usage)1653 static struct pipe_resource *si_texture_from_handle(struct pipe_screen *screen,
1654                                                     const struct pipe_resource *templ,
1655                                                     struct winsys_handle *whandle, unsigned usage)
1656 {
1657    struct si_screen *sscreen = (struct si_screen *)screen;
1658    struct pb_buffer *buf = NULL;
1659 
1660    /* Support only 2D textures without mipmaps */
1661    if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT &&
1662         templ->target != PIPE_TEXTURE_2D_ARRAY) ||
1663        templ->last_level != 0)
1664       return NULL;
1665 
1666    buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle,
1667                                          sscreen->info.max_alignment,
1668                                          templ->bind & PIPE_BIND_PRIME_BLIT_DST);
1669    if (!buf)
1670       return NULL;
1671 
1672    if (whandle->plane >= util_format_get_num_planes(whandle->format)) {
1673       struct si_auxiliary_texture *tex = CALLOC_STRUCT_CL(si_auxiliary_texture);
1674       if (!tex)
1675          return NULL;
1676       tex->b.b = *templ;
1677       tex->b.b.flags |= SI_RESOURCE_AUX_PLANE;
1678       tex->stride = whandle->stride;
1679       tex->offset = whandle->offset;
1680       tex->buffer = buf;
1681       pipe_reference_init(&tex->b.b.reference, 1);
1682       tex->b.b.screen = screen;
1683 
1684       return &tex->b.b;
1685    }
1686 
1687    return si_texture_from_winsys_buffer(sscreen, templ, buf, whandle->stride, whandle->offset,
1688                                         whandle->modifier, usage, true);
1689 }
1690 
si_init_flushed_depth_texture(struct pipe_context * ctx,struct pipe_resource * texture)1691 bool si_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resource *texture)
1692 {
1693    struct si_texture *tex = (struct si_texture *)texture;
1694    struct pipe_resource resource;
1695    enum pipe_format pipe_format = texture->format;
1696 
1697    assert(!tex->flushed_depth_texture);
1698 
1699    if (!tex->can_sample_z && tex->can_sample_s) {
1700       switch (pipe_format) {
1701       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1702          /* Save memory by not allocating the S plane. */
1703          pipe_format = PIPE_FORMAT_Z32_FLOAT;
1704          break;
1705       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1706       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1707          /* Save memory bandwidth by not copying the
1708           * stencil part during flush.
1709           *
1710           * This potentially increases memory bandwidth
1711           * if an application uses both Z and S texturing
1712           * simultaneously (a flushed Z24S8 texture
1713           * would be stored compactly), but how often
1714           * does that really happen?
1715           */
1716          pipe_format = PIPE_FORMAT_Z24X8_UNORM;
1717          break;
1718       default:;
1719       }
1720    } else if (!tex->can_sample_s && tex->can_sample_z) {
1721       assert(util_format_has_stencil(util_format_description(pipe_format)));
1722 
1723       /* DB->CB copies to an 8bpp surface don't work. */
1724       pipe_format = PIPE_FORMAT_X24S8_UINT;
1725    }
1726 
1727    memset(&resource, 0, sizeof(resource));
1728    resource.target = texture->target;
1729    resource.format = pipe_format;
1730    resource.width0 = texture->width0;
1731    resource.height0 = texture->height0;
1732    resource.depth0 = texture->depth0;
1733    resource.array_size = texture->array_size;
1734    resource.last_level = texture->last_level;
1735    resource.nr_samples = texture->nr_samples;
1736    resource.nr_storage_samples = texture->nr_storage_samples;
1737    resource.usage = PIPE_USAGE_DEFAULT;
1738    resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
1739    resource.flags = texture->flags | SI_RESOURCE_FLAG_FLUSHED_DEPTH;
1740 
1741    tex->flushed_depth_texture =
1742       (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1743    if (!tex->flushed_depth_texture) {
1744       PRINT_ERR("failed to create temporary texture to hold flushed depth\n");
1745       return false;
1746    }
1747    return true;
1748 }
1749 
1750 /**
1751  * Initialize the pipe_resource descriptor to be of the same size as the box,
1752  * which is supposed to hold a subregion of the texture "orig" at the given
1753  * mipmap level.
1754  */
si_init_temp_resource_from_box(struct pipe_resource * res,struct pipe_resource * orig,const struct pipe_box * box,unsigned level,unsigned usage,unsigned flags)1755 static void si_init_temp_resource_from_box(struct pipe_resource *res, struct pipe_resource *orig,
1756                                            const struct pipe_box *box, unsigned level,
1757                                            unsigned usage, unsigned flags)
1758 {
1759    memset(res, 0, sizeof(*res));
1760    res->format = orig->format;
1761    res->width0 = box->width;
1762    res->height0 = box->height;
1763    res->depth0 = 1;
1764    res->array_size = 1;
1765    res->usage = usage;
1766    res->flags = flags;
1767 
1768    if (flags & SI_RESOURCE_FLAG_FORCE_LINEAR && util_format_is_compressed(orig->format)) {
1769       /* Transfer resources are allocated with linear tiling, which is
1770        * not supported for compressed formats.
1771        */
1772       unsigned blocksize = util_format_get_blocksize(orig->format);
1773 
1774       if (blocksize == 8) {
1775          res->format = PIPE_FORMAT_R16G16B16A16_UINT;
1776       } else {
1777          assert(blocksize == 16);
1778          res->format = PIPE_FORMAT_R32G32B32A32_UINT;
1779       }
1780 
1781       res->width0 = util_format_get_nblocksx(orig->format, box->width);
1782       res->height0 = util_format_get_nblocksy(orig->format, box->height);
1783    }
1784 
1785    /* We must set the correct texture target and dimensions for a 3D box. */
1786    if (box->depth > 1 && util_max_layer(orig, level) > 0) {
1787       res->target = PIPE_TEXTURE_2D_ARRAY;
1788       res->array_size = box->depth;
1789    } else {
1790       res->target = PIPE_TEXTURE_2D;
1791    }
1792 }
1793 
si_can_invalidate_texture(struct si_screen * sscreen,struct si_texture * tex,unsigned transfer_usage,const struct pipe_box * box)1794 static bool si_can_invalidate_texture(struct si_screen *sscreen, struct si_texture *tex,
1795                                       unsigned transfer_usage, const struct pipe_box *box)
1796 {
1797    return !tex->buffer.b.is_shared && !(tex->surface.flags & RADEON_SURF_IMPORTED) &&
1798           !(transfer_usage & PIPE_MAP_READ) && tex->buffer.b.b.last_level == 0 &&
1799           util_texrange_covers_whole_level(&tex->buffer.b.b, 0, box->x, box->y, box->z, box->width,
1800                                            box->height, box->depth);
1801 }
1802 
si_texture_invalidate_storage(struct si_context * sctx,struct si_texture * tex)1803 static void si_texture_invalidate_storage(struct si_context *sctx, struct si_texture *tex)
1804 {
1805    struct si_screen *sscreen = sctx->screen;
1806 
1807    /* There is no point in discarding depth and tiled buffers. */
1808    assert(!tex->is_depth);
1809    assert(tex->surface.is_linear);
1810 
1811    /* Reallocate the buffer in the same pipe_resource. */
1812    si_alloc_resource(sscreen, &tex->buffer);
1813 
1814    /* Initialize the CMASK base address (needed even without CMASK). */
1815    tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
1816 
1817    p_atomic_inc(&sscreen->dirty_tex_counter);
1818 
1819    sctx->num_alloc_tex_transfer_bytes += tex->surface.total_size;
1820 }
1821 
si_texture_transfer_map(struct pipe_context * ctx,struct pipe_resource * texture,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)1822 static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resource *texture,
1823                                      unsigned level, unsigned usage, const struct pipe_box *box,
1824                                      struct pipe_transfer **ptransfer)
1825 {
1826    struct si_context *sctx = (struct si_context *)ctx;
1827    struct si_texture *tex = (struct si_texture *)texture;
1828    struct si_transfer *trans;
1829    struct si_resource *buf;
1830    unsigned offset = 0;
1831    char *map;
1832    bool use_staging_texture = tex->buffer.flags & RADEON_FLAG_ENCRYPTED;
1833    unsigned real_level = texture->nr_samples > 1 ? 0 : level;
1834 
1835    assert(texture->target != PIPE_BUFFER);
1836    assert(!(texture->flags & SI_RESOURCE_FLAG_FORCE_LINEAR));
1837    assert(box->width && box->height && box->depth);
1838 
1839    if (tex->buffer.b.b.flags & SI_RESOURCE_AUX_PLANE)
1840       return NULL;
1841 
1842    if ((tex->buffer.flags & RADEON_FLAG_ENCRYPTED) && usage & PIPE_MAP_READ)
1843       return NULL;
1844 
1845    if (tex->is_depth || tex->buffer.flags & RADEON_FLAG_SPARSE) {
1846       /* Depth and sparse textures use staging unconditionally. */
1847       use_staging_texture = true;
1848    } else {
1849       /* Degrade the tile mode if we get too many transfers on APUs.
1850        * On dGPUs, the staging texture is always faster.
1851        * Only count uploads that are at least 4x4 pixels large.
1852        */
1853       if (!sctx->screen->info.has_dedicated_vram && real_level == 0 && box->width >= 4 &&
1854           box->height >= 4 && p_atomic_inc_return(&tex->num_level0_transfers) == 10) {
1855          bool can_invalidate = si_can_invalidate_texture(sctx->screen, tex, usage, box);
1856 
1857          si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_LINEAR, can_invalidate);
1858       }
1859 
1860       /* Tiled textures need to be converted into a linear texture for CPU
1861        * access. The staging texture is always linear and is placed in GART.
1862        *
1863        * dGPU use a staging texture for VRAM, so that we don't map it and
1864        * don't relocate it to GTT.
1865        *
1866        * Reading from VRAM or GTT WC is slow, always use the staging
1867        * texture in this case.
1868        *
1869        * Use the staging texture for uploads if the underlying BO
1870        * is busy.
1871        */
1872       if (!tex->surface.is_linear || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) ||
1873           (tex->buffer.domains & RADEON_DOMAIN_VRAM && sctx->screen->info.has_dedicated_vram &&
1874            !sctx->screen->info.smart_access_memory))
1875          use_staging_texture = true;
1876       else if (usage & PIPE_MAP_READ)
1877          use_staging_texture =
1878             tex->buffer.domains & RADEON_DOMAIN_VRAM || tex->buffer.flags & RADEON_FLAG_GTT_WC;
1879       /* Write & linear only: */
1880       else if (si_cs_is_buffer_referenced(sctx, tex->buffer.buf, RADEON_USAGE_READWRITE) ||
1881                !sctx->ws->buffer_wait(sctx->ws, tex->buffer.buf, 0, RADEON_USAGE_READWRITE)) {
1882          /* It's busy. */
1883          if (si_can_invalidate_texture(sctx->screen, tex, usage, box))
1884             si_texture_invalidate_storage(sctx, tex);
1885          else
1886             use_staging_texture = true;
1887       }
1888    }
1889 
1890    trans = CALLOC_STRUCT(si_transfer);
1891    if (!trans)
1892       return NULL;
1893    pipe_resource_reference(&trans->b.b.resource, texture);
1894    trans->b.b.level = level;
1895    trans->b.b.usage = usage;
1896    trans->b.b.box = *box;
1897 
1898    if (use_staging_texture) {
1899       struct pipe_resource resource;
1900       struct si_texture *staging;
1901       unsigned bo_usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
1902       unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR | SI_RESOURCE_FLAG_DRIVER_INTERNAL;
1903 
1904       si_init_temp_resource_from_box(&resource, texture, box, real_level, bo_usage,
1905                                      bo_flags);
1906 
1907       /* Since depth-stencil textures don't support linear tiling,
1908        * blit from ZS to color and vice versa. u_blitter will do
1909        * the packing for these formats.
1910        */
1911       if (tex->is_depth)
1912          resource.format = util_blitter_get_color_format_for_zs(resource.format);
1913 
1914       /* Create the temporary texture. */
1915       staging = (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1916       if (!staging) {
1917          PRINT_ERR("failed to create temporary texture to hold untiled copy\n");
1918          goto fail_trans;
1919       }
1920       trans->staging = &staging->buffer;
1921 
1922       /* Just get the strides. */
1923       si_texture_get_offset(sctx->screen, staging, 0, NULL, &trans->b.b.stride,
1924                             &trans->b.b.layer_stride);
1925 
1926       if (usage & PIPE_MAP_READ)
1927          si_copy_to_staging_texture(ctx, trans);
1928       else
1929          usage |= PIPE_MAP_UNSYNCHRONIZED;
1930 
1931       buf = trans->staging;
1932    } else {
1933       /* the resource is mapped directly */
1934       offset = si_texture_get_offset(sctx->screen, tex, real_level, box, &trans->b.b.stride,
1935                                      &trans->b.b.layer_stride);
1936       buf = &tex->buffer;
1937    }
1938 
1939    /* Always unmap texture CPU mappings on 32-bit architectures, so that
1940     * we don't run out of the CPU address space.
1941     */
1942    if (sizeof(void *) == 4)
1943       usage |= RADEON_MAP_TEMPORARY;
1944 
1945    if (!(map = si_buffer_map(sctx, buf, usage)))
1946       goto fail_trans;
1947 
1948    *ptransfer = &trans->b.b;
1949    return map + offset;
1950 
1951 fail_trans:
1952    si_resource_reference(&trans->staging, NULL);
1953    pipe_resource_reference(&trans->b.b.resource, NULL);
1954    FREE(trans);
1955    return NULL;
1956 }
1957 
si_texture_transfer_unmap(struct pipe_context * ctx,struct pipe_transfer * transfer)1958 static void si_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer)
1959 {
1960    struct si_context *sctx = (struct si_context *)ctx;
1961    struct si_transfer *stransfer = (struct si_transfer *)transfer;
1962    struct pipe_resource *texture = transfer->resource;
1963    struct si_texture *tex = (struct si_texture *)texture;
1964 
1965    /* Always unmap texture CPU mappings on 32-bit architectures, so that
1966     * we don't run out of the CPU address space.
1967     */
1968    if (sizeof(void *) == 4) {
1969       struct si_resource *buf = stransfer->staging ? stransfer->staging : &tex->buffer;
1970 
1971       sctx->ws->buffer_unmap(sctx->ws, buf->buf);
1972    }
1973 
1974    if ((transfer->usage & PIPE_MAP_WRITE) && stransfer->staging)
1975       si_copy_from_staging_texture(ctx, stransfer);
1976 
1977    if (stransfer->staging) {
1978       sctx->num_alloc_tex_transfer_bytes += stransfer->staging->buf->size;
1979       si_resource_reference(&stransfer->staging, NULL);
1980    }
1981 
1982    /* Heuristic for {upload, draw, upload, draw, ..}:
1983     *
1984     * Flush the gfx IB if we've allocated too much texture storage.
1985     *
1986     * The idea is that we don't want to build IBs that use too much
1987     * memory and put pressure on the kernel memory manager and we also
1988     * want to make temporary and invalidated buffers go idle ASAP to
1989     * decrease the total memory usage or make them reusable. The memory
1990     * usage will be slightly higher than given here because of the buffer
1991     * cache in the winsys.
1992     *
1993     * The result is that the kernel memory manager is never a bottleneck.
1994     */
1995    if (sctx->num_alloc_tex_transfer_bytes > (uint64_t)sctx->screen->info.gart_size_kb * 1024 / 4) {
1996       si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
1997       sctx->num_alloc_tex_transfer_bytes = 0;
1998    }
1999 
2000    pipe_resource_reference(&transfer->resource, NULL);
2001    FREE(transfer);
2002 }
2003 
2004 /* Return if it's allowed to reinterpret one format as another with DCC enabled.
2005  */
vi_dcc_formats_compatible(struct si_screen * sscreen,enum pipe_format format1,enum pipe_format format2)2006 bool vi_dcc_formats_compatible(struct si_screen *sscreen, enum pipe_format format1,
2007                                enum pipe_format format2)
2008 {
2009    const struct util_format_description *desc1, *desc2;
2010 
2011    /* All formats are compatible on GFX11. */
2012    if (sscreen->info.gfx_level >= GFX11)
2013       return true;
2014 
2015    /* No format change - exit early. */
2016    if (format1 == format2)
2017       return true;
2018 
2019    format1 = si_simplify_cb_format(format1);
2020    format2 = si_simplify_cb_format(format2);
2021 
2022    /* Check again after format adjustments. */
2023    if (format1 == format2)
2024       return true;
2025 
2026    desc1 = util_format_description(format1);
2027    desc2 = util_format_description(format2);
2028 
2029    if (desc1->layout != UTIL_FORMAT_LAYOUT_PLAIN || desc2->layout != UTIL_FORMAT_LAYOUT_PLAIN)
2030       return false;
2031 
2032    /* Float and non-float are totally incompatible. */
2033    if ((desc1->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) !=
2034        (desc2->channel[0].type == UTIL_FORMAT_TYPE_FLOAT))
2035       return false;
2036 
2037    /* Channel sizes must match across DCC formats.
2038     * Comparing just the first 2 channels should be enough.
2039     */
2040    if (desc1->channel[0].size != desc2->channel[0].size ||
2041        (desc1->nr_channels >= 2 && desc1->channel[1].size != desc2->channel[1].size))
2042       return false;
2043 
2044    /* Everything below is not needed if the driver never uses the DCC
2045     * clear code with the value of 1.
2046     */
2047 
2048    /* If the clear values are all 1 or all 0, this constraint can be
2049     * ignored. */
2050    if (vi_alpha_is_on_msb(sscreen, format1) != vi_alpha_is_on_msb(sscreen, format2))
2051       return false;
2052 
2053    /* Channel types must match if the clear value of 1 is used.
2054     * The type categories are only float, signed, unsigned.
2055     * NORM and INT are always compatible.
2056     */
2057    if (desc1->channel[0].type != desc2->channel[0].type ||
2058        (desc1->nr_channels >= 2 && desc1->channel[1].type != desc2->channel[1].type))
2059       return false;
2060 
2061    return true;
2062 }
2063 
vi_dcc_formats_are_incompatible(struct pipe_resource * tex,unsigned level,enum pipe_format view_format)2064 bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, unsigned level,
2065                                      enum pipe_format view_format)
2066 {
2067    struct si_texture *stex = (struct si_texture *)tex;
2068 
2069    return vi_dcc_enabled(stex, level) &&
2070           !vi_dcc_formats_compatible((struct si_screen *)tex->screen, tex->format, view_format);
2071 }
2072 
2073 /* This can't be merged with the above function, because
2074  * vi_dcc_formats_compatible should be called only when DCC is enabled. */
vi_disable_dcc_if_incompatible_format(struct si_context * sctx,struct pipe_resource * tex,unsigned level,enum pipe_format view_format)2075 void vi_disable_dcc_if_incompatible_format(struct si_context *sctx, struct pipe_resource *tex,
2076                                            unsigned level, enum pipe_format view_format)
2077 {
2078    struct si_texture *stex = (struct si_texture *)tex;
2079 
2080    if (vi_dcc_formats_are_incompatible(tex, level, view_format))
2081       if (!si_texture_disable_dcc(sctx, stex))
2082          si_decompress_dcc(sctx, stex);
2083 }
2084 
si_create_surface(struct pipe_context * pipe,struct pipe_resource * tex,const struct pipe_surface * templ)2085 static struct pipe_surface *si_create_surface(struct pipe_context *pipe, struct pipe_resource *tex,
2086                                               const struct pipe_surface *templ)
2087 {
2088    unsigned level = templ->u.tex.level;
2089    unsigned width = u_minify(tex->width0, level);
2090    unsigned height = u_minify(tex->height0, level);
2091    unsigned width0 = tex->width0;
2092    unsigned height0 = tex->height0;
2093 
2094    if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
2095       const struct util_format_description *tex_desc = util_format_description(tex->format);
2096       const struct util_format_description *templ_desc = util_format_description(templ->format);
2097 
2098       assert(tex_desc->block.bits == templ_desc->block.bits);
2099 
2100       /* Adjust size of surface if and only if the block width or
2101        * height is changed. */
2102       if (tex_desc->block.width != templ_desc->block.width ||
2103           tex_desc->block.height != templ_desc->block.height) {
2104          unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
2105          unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
2106 
2107          width = nblks_x * templ_desc->block.width;
2108          height = nblks_y * templ_desc->block.height;
2109 
2110          width0 = util_format_get_nblocksx(tex->format, width0);
2111          height0 = util_format_get_nblocksy(tex->format, height0);
2112       }
2113    }
2114 
2115    struct si_surface *surface = CALLOC_STRUCT(si_surface);
2116 
2117    if (!surface)
2118       return NULL;
2119 
2120    assert(templ->u.tex.first_layer <= util_max_layer(tex, templ->u.tex.level));
2121    assert(templ->u.tex.last_layer <= util_max_layer(tex, templ->u.tex.level));
2122 
2123    pipe_reference_init(&surface->base.reference, 1);
2124    pipe_resource_reference(&surface->base.texture, tex);
2125    surface->base.context = pipe;
2126    surface->base.format = templ->format;
2127    surface->base.width = width;
2128    surface->base.height = height;
2129    surface->base.u = templ->u;
2130 
2131    surface->width0 = width0;
2132    surface->height0 = height0;
2133 
2134    surface->dcc_incompatible =
2135       tex->target != PIPE_BUFFER &&
2136       vi_dcc_formats_are_incompatible(tex, templ->u.tex.level, templ->format);
2137    return &surface->base;
2138 }
2139 
si_surface_destroy(struct pipe_context * pipe,struct pipe_surface * surface)2140 static void si_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surface)
2141 {
2142    pipe_resource_reference(&surface->texture, NULL);
2143    FREE(surface);
2144 }
2145 
si_translate_colorswap(enum amd_gfx_level gfx_level,enum pipe_format format,bool do_endian_swap)2146 unsigned si_translate_colorswap(enum amd_gfx_level gfx_level, enum pipe_format format,
2147                                 bool do_endian_swap)
2148 {
2149    const struct util_format_description *desc = util_format_description(format);
2150 
2151 #define HAS_SWIZZLE(chan, swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
2152 
2153    if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
2154       return V_028C70_SWAP_STD;
2155 
2156    if (gfx_level >= GFX10_3 &&
2157        format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */
2158       return V_028C70_SWAP_STD;
2159 
2160    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
2161       return ~0U;
2162 
2163    switch (desc->nr_channels) {
2164    case 1:
2165       if (HAS_SWIZZLE(0, X))
2166          return V_028C70_SWAP_STD; /* X___ */
2167       else if (HAS_SWIZZLE(3, X))
2168          return V_028C70_SWAP_ALT_REV; /* ___X */
2169       break;
2170    case 2:
2171       if ((HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, Y)) || (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, NONE)) ||
2172           (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, Y)))
2173          return V_028C70_SWAP_STD; /* XY__ */
2174       else if ((HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, X)) ||
2175                (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, NONE)) ||
2176                (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, X)))
2177          /* YX__ */
2178          return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
2179       else if (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(3, Y))
2180          return V_028C70_SWAP_ALT; /* X__Y */
2181       else if (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(3, X))
2182          return V_028C70_SWAP_ALT_REV; /* Y__X */
2183       break;
2184    case 3:
2185       if (HAS_SWIZZLE(0, X))
2186          return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
2187       else if (HAS_SWIZZLE(0, Z))
2188          return V_028C70_SWAP_STD_REV; /* ZYX */
2189       break;
2190    case 4:
2191       /* check the middle channels, the 1st and 4th channel can be NONE */
2192       if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, Z)) {
2193          return V_028C70_SWAP_STD; /* XYZW */
2194       } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, Y)) {
2195          return V_028C70_SWAP_STD_REV; /* WZYX */
2196       } else if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, X)) {
2197          return V_028C70_SWAP_ALT; /* ZYXW */
2198       } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, W)) {
2199          /* YZWX */
2200          if (desc->is_array)
2201             return V_028C70_SWAP_ALT_REV;
2202          else
2203             return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
2204       }
2205       break;
2206    }
2207    return ~0U;
2208 }
2209 
2210 static struct pipe_memory_object *
si_memobj_from_handle(struct pipe_screen * screen,struct winsys_handle * whandle,bool dedicated)2211 si_memobj_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle, bool dedicated)
2212 {
2213    struct si_screen *sscreen = (struct si_screen *)screen;
2214    struct si_memory_object *memobj = CALLOC_STRUCT(si_memory_object);
2215    struct pb_buffer *buf = NULL;
2216 
2217    if (!memobj)
2218       return NULL;
2219 
2220    buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, sscreen->info.max_alignment, false);
2221    if (!buf) {
2222       free(memobj);
2223       return NULL;
2224    }
2225 
2226    memobj->b.dedicated = dedicated;
2227    memobj->buf = buf;
2228    memobj->stride = whandle->stride;
2229 
2230    return (struct pipe_memory_object *)memobj;
2231 }
2232 
si_memobj_destroy(struct pipe_screen * screen,struct pipe_memory_object * _memobj)2233 static void si_memobj_destroy(struct pipe_screen *screen, struct pipe_memory_object *_memobj)
2234 {
2235    struct si_memory_object *memobj = (struct si_memory_object *)_memobj;
2236 
2237    radeon_bo_reference(((struct si_screen*)screen)->ws, &memobj->buf, NULL);
2238    free(memobj);
2239 }
2240 
si_resource_from_memobj(struct pipe_screen * screen,const struct pipe_resource * templ,struct pipe_memory_object * _memobj,uint64_t offset)2241 static struct pipe_resource *si_resource_from_memobj(struct pipe_screen *screen,
2242                                                     const struct pipe_resource *templ,
2243                                                     struct pipe_memory_object *_memobj,
2244                                                     uint64_t offset)
2245 {
2246    struct si_screen *sscreen = (struct si_screen *)screen;
2247    struct si_memory_object *memobj = (struct si_memory_object *)_memobj;
2248    struct pipe_resource *res;
2249 
2250    if (templ->target == PIPE_BUFFER)
2251       res = si_buffer_from_winsys_buffer(screen, templ, memobj->buf, offset);
2252    else
2253       res = si_texture_from_winsys_buffer(sscreen, templ, memobj->buf,
2254                                           memobj->stride,
2255                                           offset, DRM_FORMAT_MOD_INVALID,
2256                                           PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE | PIPE_HANDLE_USAGE_SHADER_WRITE,
2257                                           memobj->b.dedicated);
2258 
2259    if (!res)
2260       return NULL;
2261 
2262    /* si_texture_from_winsys_buffer doesn't increment refcount of
2263     * memobj->buf, so increment it here.
2264     */
2265    struct pb_buffer *buf = NULL;
2266    radeon_bo_reference(sscreen->ws, &buf, memobj->buf);
2267    return res;
2268 }
2269 
si_check_resource_capability(struct pipe_screen * screen,struct pipe_resource * resource,unsigned bind)2270 static bool si_check_resource_capability(struct pipe_screen *screen, struct pipe_resource *resource,
2271                                          unsigned bind)
2272 {
2273    struct si_texture *tex = (struct si_texture *)resource;
2274 
2275    /* Buffers only support the linear flag. */
2276    if (resource->target == PIPE_BUFFER)
2277       return (bind & ~PIPE_BIND_LINEAR) == 0;
2278 
2279    if (bind & PIPE_BIND_LINEAR && !tex->surface.is_linear)
2280       return false;
2281 
2282    if (bind & PIPE_BIND_SCANOUT && !tex->surface.is_displayable)
2283       return false;
2284 
2285    /* TODO: PIPE_BIND_CURSOR - do we care? */
2286    return true;
2287 }
2288 
si_get_sparse_texture_virtual_page_size(struct pipe_screen * screen,enum pipe_texture_target target,bool multi_sample,enum pipe_format format,unsigned offset,unsigned size,int * x,int * y,int * z)2289 static int si_get_sparse_texture_virtual_page_size(struct pipe_screen *screen,
2290                                                    enum pipe_texture_target target,
2291                                                    bool multi_sample,
2292                                                    enum pipe_format format,
2293                                                    unsigned offset, unsigned size,
2294                                                    int *x, int *y, int *z)
2295 {
2296    struct si_screen *sscreen = (struct si_screen *)screen;
2297 
2298    /* Only support one type of page size. */
2299    if (offset != 0)
2300       return 0;
2301 
2302    static const int page_size_2d[][3] = {
2303       { 256, 256, 1 }, /* 8bpp   */
2304       { 256, 128, 1 }, /* 16bpp  */
2305       { 128, 128, 1 }, /* 32bpp  */
2306       { 128, 64,  1 }, /* 64bpp  */
2307       { 64,  64,  1 }, /* 128bpp */
2308    };
2309    static const int page_size_3d[][3] = {
2310       { 64,  32,  32 }, /* 8bpp   */
2311       { 32,  32,  32 }, /* 16bpp  */
2312       { 32,  32,  16 }, /* 32bpp  */
2313       { 32,  16,  16 }, /* 64bpp  */
2314       { 16,  16,  16 }, /* 128bpp */
2315    };
2316 
2317    const int (*page_sizes)[3];
2318 
2319    /* Supported targets. */
2320    switch (target) {
2321    case PIPE_TEXTURE_2D:
2322    case PIPE_TEXTURE_CUBE:
2323    case PIPE_TEXTURE_RECT:
2324    case PIPE_TEXTURE_2D_ARRAY:
2325    case PIPE_TEXTURE_CUBE_ARRAY:
2326       page_sizes = page_size_2d;
2327       break;
2328    case PIPE_TEXTURE_3D:
2329       page_sizes = page_size_3d;
2330       break;
2331    default:
2332       return 0;
2333    }
2334 
2335    /* ARB_sparse_texture2 need to query supported virtual page x/y/z without
2336     * knowing the actual sample count. So we need to return a fixed virtual page
2337     * x/y/z for all sample count which means the virtual page size can not be fixed
2338     * to 64KB.
2339     *
2340     * Only enabled for GFX9. GFX10+ removed MS texture support. By specification
2341     * ARB_sparse_texture2 need MS texture support, but we relax it by just return
2342     * no page size for GFX10+ to keep shader query capbility.
2343     */
2344    if (multi_sample && sscreen->info.gfx_level != GFX9)
2345       return 0;
2346 
2347    /* Unsupport formats. */
2348    /* TODO: support these formats. */
2349    if (util_format_is_depth_or_stencil(format) ||
2350        util_format_get_num_planes(format) > 1 ||
2351        util_format_is_compressed(format))
2352       return 0;
2353 
2354    int blk_size = util_format_get_blocksize(format);
2355    /* We don't support any non-power-of-two bpp formats, so
2356     * pipe_screen->is_format_supported() should already filter out these formats.
2357     */
2358    assert(util_is_power_of_two_nonzero(blk_size));
2359 
2360    if (size) {
2361       unsigned index = util_logbase2(blk_size);
2362       if (x) *x = page_sizes[index][0];
2363       if (y) *y = page_sizes[index][1];
2364       if (z) *z = page_sizes[index][2];
2365    }
2366 
2367    return 1;
2368 }
2369 
si_init_screen_texture_functions(struct si_screen * sscreen)2370 void si_init_screen_texture_functions(struct si_screen *sscreen)
2371 {
2372    sscreen->b.resource_from_handle = si_texture_from_handle;
2373    sscreen->b.resource_get_handle = si_texture_get_handle;
2374    sscreen->b.resource_get_param = si_resource_get_param;
2375    sscreen->b.resource_get_info = si_texture_get_info;
2376    sscreen->b.resource_from_memobj = si_resource_from_memobj;
2377    sscreen->b.memobj_create_from_handle = si_memobj_from_handle;
2378    sscreen->b.memobj_destroy = si_memobj_destroy;
2379    sscreen->b.check_resource_capability = si_check_resource_capability;
2380    sscreen->b.get_sparse_texture_virtual_page_size =
2381       si_get_sparse_texture_virtual_page_size;
2382 
2383    /* By not setting it the frontend will fall back to non-modifier create,
2384     * which works around some applications using modifiers that are not
2385     * allowed in combination with lack of error reporting in
2386     * gbm_dri_surface_create */
2387    if (sscreen->info.gfx_level >= GFX9 && sscreen->info.kernel_has_modifiers) {
2388       sscreen->b.resource_create_with_modifiers = si_texture_create_with_modifiers;
2389       sscreen->b.query_dmabuf_modifiers = si_query_dmabuf_modifiers;
2390       sscreen->b.is_dmabuf_modifier_supported = si_is_dmabuf_modifier_supported;
2391       sscreen->b.get_dmabuf_modifier_planes = si_get_dmabuf_modifier_planes;
2392    }
2393 }
2394 
si_init_context_texture_functions(struct si_context * sctx)2395 void si_init_context_texture_functions(struct si_context *sctx)
2396 {
2397    sctx->b.texture_map = si_texture_transfer_map;
2398    sctx->b.texture_unmap = si_texture_transfer_unmap;
2399    sctx->b.create_surface = si_create_surface;
2400    sctx->b.surface_destroy = si_surface_destroy;
2401 }
2402