• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  * Copyright 2018 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 #include "drm-uapi/drm_fourcc.h"
27 #include "si_pipe.h"
28 #include "si_query.h"
29 #include "sid.h"
30 #include "frontend/drm_driver.h"
31 #include "util/format/u_format.h"
32 #include "util/os_time.h"
33 #include "util/u_log.h"
34 #include "util/u_memory.h"
35 #include "util/u_pack_color.h"
36 #include "util/u_resource.h"
37 #include "util/u_surface.h"
38 #include "util/u_transfer.h"
39 
40 #include <errno.h>
41 #include <inttypes.h>
42 
43 #include "amd/addrlib/inc/addrinterface.h"
44 
45 static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen,
46                                               const struct pipe_resource *templ,
47                                               bool tc_compatible_htile);
48 
49 static bool si_texture_is_aux_plane(const struct pipe_resource *resource);
50 
51 /* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
si_copy_region_with_blit(struct pipe_context * pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)52 static void si_copy_region_with_blit(struct pipe_context *pipe, struct pipe_resource *dst,
53                                      unsigned dst_level, unsigned dstx, unsigned dsty,
54                                      unsigned dstz, struct pipe_resource *src, unsigned src_level,
55                                      const struct pipe_box *src_box)
56 {
57    struct pipe_blit_info blit;
58 
59    memset(&blit, 0, sizeof(blit));
60    blit.src.resource = src;
61    blit.src.format = src->format;
62    blit.src.level = src_level;
63    blit.src.box = *src_box;
64    blit.dst.resource = dst;
65    blit.dst.format = dst->format;
66    blit.dst.level = dst_level;
67    blit.dst.box.x = dstx;
68    blit.dst.box.y = dsty;
69    blit.dst.box.z = dstz;
70    blit.dst.box.width = src_box->width;
71    blit.dst.box.height = src_box->height;
72    blit.dst.box.depth = src_box->depth;
73    blit.mask = util_format_get_mask(dst->format);
74    blit.filter = PIPE_TEX_FILTER_NEAREST;
75 
76    if (blit.mask) {
77       pipe->blit(pipe, &blit);
78    }
79 }
80 
81 /* Copy from a full GPU texture to a transfer's staging one. */
si_copy_to_staging_texture(struct pipe_context * ctx,struct si_transfer * stransfer)82 static void si_copy_to_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer)
83 {
84    struct pipe_transfer *transfer = (struct pipe_transfer *)stransfer;
85    struct pipe_resource *dst = &stransfer->staging->b.b;
86    struct pipe_resource *src = transfer->resource;
87 
88    if (src->nr_samples > 1 || ((struct si_texture *)src)->is_depth) {
89       si_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, src, transfer->level, &transfer->box);
90       return;
91    }
92 
93    si_resource_copy_region(ctx, dst, 0, 0, 0, 0, src, transfer->level, &transfer->box);
94 }
95 
96 /* Copy from a transfer's staging texture to a full GPU one. */
si_copy_from_staging_texture(struct pipe_context * ctx,struct si_transfer * stransfer)97 static void si_copy_from_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer)
98 {
99    struct pipe_transfer *transfer = (struct pipe_transfer *)stransfer;
100    struct pipe_resource *dst = transfer->resource;
101    struct pipe_resource *src = &stransfer->staging->b.b;
102    struct pipe_box sbox;
103 
104    u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
105 
106    if (dst->nr_samples > 1 || ((struct si_texture *)dst)->is_depth) {
107       si_copy_region_with_blit(ctx, dst, transfer->level, transfer->box.x, transfer->box.y,
108                                transfer->box.z, src, 0, &sbox);
109       return;
110    }
111 
112    if (util_format_is_compressed(dst->format)) {
113       sbox.width = util_format_get_nblocksx(dst->format, sbox.width);
114       sbox.height = util_format_get_nblocksx(dst->format, sbox.height);
115    }
116 
117    si_resource_copy_region(ctx, dst, transfer->level, transfer->box.x, transfer->box.y,
118                            transfer->box.z, src, 0, &sbox);
119 }
120 
si_texture_get_offset(struct si_screen * sscreen,struct si_texture * tex,unsigned level,const struct pipe_box * box,unsigned * stride,unsigned * layer_stride)121 static unsigned si_texture_get_offset(struct si_screen *sscreen, struct si_texture *tex,
122                                       unsigned level, const struct pipe_box *box, unsigned *stride,
123                                       unsigned *layer_stride)
124 {
125    if (sscreen->info.chip_class >= GFX9) {
126       *stride = tex->surface.u.gfx9.surf_pitch * tex->surface.bpe;
127       *layer_stride = tex->surface.u.gfx9.surf_slice_size;
128 
129       if (!box)
130          return 0;
131 
132       /* Each texture is an array of slices. Each slice is an array
133        * of mipmap levels. */
134       return tex->surface.u.gfx9.surf_offset + box->z * tex->surface.u.gfx9.surf_slice_size +
135              tex->surface.u.gfx9.offset[level] +
136              (box->y / tex->surface.blk_h * tex->surface.u.gfx9.surf_pitch +
137               box->x / tex->surface.blk_w) *
138                 tex->surface.bpe;
139    } else {
140       *stride = tex->surface.u.legacy.level[level].nblk_x * tex->surface.bpe;
141       assert((uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 <= UINT_MAX);
142       *layer_stride = (uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4;
143 
144       if (!box)
145          return (uint64_t)tex->surface.u.legacy.level[level].offset_256B * 256;
146 
147       /* Each texture is an array of mipmap levels. Each level is
148        * an array of slices. */
149       return (uint64_t)tex->surface.u.legacy.level[level].offset_256B * 256 +
150              box->z * (uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 +
151              (box->y / tex->surface.blk_h * tex->surface.u.legacy.level[level].nblk_x +
152               box->x / tex->surface.blk_w) *
153                 tex->surface.bpe;
154    }
155 }
156 
si_init_surface(struct si_screen * sscreen,struct radeon_surf * surface,const struct pipe_resource * ptex,enum radeon_surf_mode array_mode,uint64_t modifier,bool is_imported,bool is_scanout,bool is_flushed_depth,bool tc_compatible_htile)157 static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surface,
158                            const struct pipe_resource *ptex, enum radeon_surf_mode array_mode,
159                            uint64_t modifier, bool is_imported, bool is_scanout,
160                            bool is_flushed_depth, bool tc_compatible_htile)
161 {
162    const struct util_format_description *desc = util_format_description(ptex->format);
163    bool is_depth, is_stencil;
164    int r;
165    unsigned bpe;
166    uint64_t flags = 0;
167 
168    is_depth = util_format_has_depth(desc);
169    is_stencil = util_format_has_stencil(desc);
170 
171    if (!is_flushed_depth && ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
172       bpe = 4; /* stencil is allocated separately */
173    } else {
174       bpe = util_format_get_blocksize(ptex->format);
175       assert(util_is_power_of_two_or_zero(bpe));
176    }
177 
178    if (!is_flushed_depth && is_depth) {
179       flags |= RADEON_SURF_ZBUFFER;
180 
181       if ((sscreen->debug_flags & DBG(NO_HYPERZ)) ||
182           (ptex->bind & PIPE_BIND_SHARED) || is_imported) {
183          flags |= RADEON_SURF_NO_HTILE;
184       } else if (tc_compatible_htile &&
185                  (sscreen->info.chip_class >= GFX9 || array_mode == RADEON_SURF_MODE_2D)) {
186          /* TC-compatible HTILE only supports Z32_FLOAT.
187           * GFX9 also supports Z16_UNORM.
188           * On GFX8, promote Z16 to Z32. DB->CB copies will convert
189           * the format for transfers.
190           */
191          if (sscreen->info.chip_class == GFX8)
192             bpe = 4;
193 
194          flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
195       }
196 
197       if (is_stencil)
198          flags |= RADEON_SURF_SBUFFER;
199    }
200 
201    /* Disable DCC? */
202    if (sscreen->info.chip_class >= GFX8) {
203       /* Global options that disable DCC. */
204       if (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC)
205          flags |= RADEON_SURF_DISABLE_DCC;
206 
207       if (ptex->nr_samples >= 2 && sscreen->debug_flags & DBG(NO_DCC_MSAA))
208          flags |= RADEON_SURF_DISABLE_DCC;
209 
210       /* Shared textures must always set up DCC. If it's not present, it will be disabled by
211        * si_get_opaque_metadata later.
212        */
213       if (!is_imported &&
214           (sscreen->debug_flags & DBG(NO_DCC) ||
215            (ptex->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_DCC))))
216          flags |= RADEON_SURF_DISABLE_DCC;
217 
218       /* R9G9B9E5 isn't supported for rendering by older generations. */
219       if (sscreen->info.chip_class < GFX10_3 &&
220           ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT)
221          flags |= RADEON_SURF_DISABLE_DCC;
222 
223       switch (sscreen->info.chip_class) {
224       case GFX8:
225          /* Stoney: 128bpp MSAA textures randomly fail piglit tests with DCC. */
226          if (sscreen->info.family == CHIP_STONEY && bpe == 16 && ptex->nr_samples >= 2)
227             flags |= RADEON_SURF_DISABLE_DCC;
228 
229          /* DCC clear for 4x and 8x MSAA array textures unimplemented. */
230          if (ptex->nr_storage_samples >= 4 && ptex->array_size > 1)
231             flags |= RADEON_SURF_DISABLE_DCC;
232          break;
233 
234       case GFX9:
235          /* DCC MSAA fails this on Raven:
236           *    https://www.khronos.org/registry/webgl/sdk/tests/deqp/functional/gles3/fbomultisample.2_samples.html
237           * and this on Picasso:
238           *    https://www.khronos.org/registry/webgl/sdk/tests/deqp/functional/gles3/fbomultisample.4_samples.html
239           */
240          if (sscreen->info.family == CHIP_RAVEN && ptex->nr_storage_samples >= 2 && bpe < 4)
241             flags |= RADEON_SURF_DISABLE_DCC;
242          break;
243 
244       case GFX10:
245       case GFX10_3:
246          /* DCC causes corruption with MSAA. */
247          if (ptex->nr_storage_samples >= 2)
248             flags |= RADEON_SURF_DISABLE_DCC;
249          break;
250 
251       default:
252          assert(0);
253       }
254    }
255 
256    if (is_scanout) {
257       /* This should catch bugs in gallium users setting incorrect flags. */
258       assert(ptex->nr_samples <= 1 && ptex->array_size == 1 && ptex->depth0 == 1 &&
259              ptex->last_level == 0 && !(flags & RADEON_SURF_Z_OR_SBUFFER));
260 
261       flags |= RADEON_SURF_SCANOUT;
262    }
263 
264    if (ptex->bind & PIPE_BIND_SHARED)
265       flags |= RADEON_SURF_SHAREABLE;
266    if (is_imported)
267       flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
268    if (sscreen->debug_flags & DBG(NO_FMASK))
269       flags |= RADEON_SURF_NO_FMASK;
270 
271    if (sscreen->info.chip_class == GFX9 && (ptex->flags & SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE)) {
272       flags |= RADEON_SURF_FORCE_MICRO_TILE_MODE;
273       surface->micro_tile_mode = SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(ptex->flags);
274    }
275 
276    if (ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING) {
277       flags |= RADEON_SURF_FORCE_SWIZZLE_MODE;
278 
279       if (sscreen->info.chip_class >= GFX10)
280          surface->u.gfx9.swizzle_mode = ADDR_SW_64KB_R_X;
281    }
282 
283    surface->modifier = modifier;
284 
285    r = sscreen->ws->surface_init(sscreen->ws, ptex, flags, bpe, array_mode, surface);
286    if (r) {
287       return r;
288    }
289 
290    return 0;
291 }
292 
si_eliminate_fast_color_clear(struct si_context * sctx,struct si_texture * tex,bool * ctx_flushed)293 void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex,
294                                    bool *ctx_flushed)
295 {
296    struct si_screen *sscreen = sctx->screen;
297    struct pipe_context *ctx = &sctx->b;
298 
299    if (ctx == sscreen->aux_context)
300       simple_mtx_lock(&sscreen->aux_context_lock);
301 
302    unsigned n = sctx->num_decompress_calls;
303    ctx->flush_resource(ctx, &tex->buffer.b.b);
304 
305    /* Flush only if any fast clear elimination took place. */
306    bool flushed = false;
307    if (n != sctx->num_decompress_calls)
308    {
309       ctx->flush(ctx, NULL, 0);
310       flushed = true;
311    }
312    if (ctx_flushed)
313       *ctx_flushed = flushed;
314 
315    if (ctx == sscreen->aux_context)
316       simple_mtx_unlock(&sscreen->aux_context_lock);
317 }
318 
si_texture_discard_cmask(struct si_screen * sscreen,struct si_texture * tex)319 void si_texture_discard_cmask(struct si_screen *sscreen, struct si_texture *tex)
320 {
321    if (!tex->cmask_buffer)
322       return;
323 
324    assert(tex->buffer.b.b.nr_samples <= 1);
325 
326    /* Disable CMASK. */
327    tex->cmask_base_address_reg = tex->buffer.gpu_address >> 8;
328    tex->dirty_level_mask = 0;
329 
330    tex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
331 
332    if (tex->cmask_buffer != &tex->buffer)
333       si_resource_reference(&tex->cmask_buffer, NULL);
334 
335    tex->cmask_buffer = NULL;
336 
337    /* Notify all contexts about the change. */
338    p_atomic_inc(&sscreen->dirty_tex_counter);
339    p_atomic_inc(&sscreen->compressed_colortex_counter);
340 }
341 
si_can_disable_dcc(struct si_texture * tex)342 static bool si_can_disable_dcc(struct si_texture *tex)
343 {
344    /* We can't disable DCC if it can be written by another process. */
345    return !tex->is_depth &&
346           tex->surface.meta_offset &&
347           (!tex->buffer.b.is_shared ||
348            !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)) &&
349           !ac_modifier_has_dcc(tex->surface.modifier);
350 }
351 
si_texture_discard_dcc(struct si_screen * sscreen,struct si_texture * tex)352 static bool si_texture_discard_dcc(struct si_screen *sscreen, struct si_texture *tex)
353 {
354    if (!si_can_disable_dcc(tex))
355       return false;
356 
357    /* Disable DCC. */
358    ac_surface_zero_dcc_fields(&tex->surface);
359 
360    /* Notify all contexts about the change. */
361    p_atomic_inc(&sscreen->dirty_tex_counter);
362    return true;
363 }
364 
365 /**
366  * Disable DCC for the texture. (first decompress, then discard metadata).
367  *
368  * There is unresolved multi-context synchronization issue between
369  * screen::aux_context and the current context. If applications do this with
370  * multiple contexts, it's already undefined behavior for them and we don't
371  * have to worry about that. The scenario is:
372  *
373  * If context 1 disables DCC and context 2 has queued commands that write
374  * to the texture via CB with DCC enabled, and the order of operations is
375  * as follows:
376  *   context 2 queues draw calls rendering to the texture, but doesn't flush
377  *   context 1 disables DCC and flushes
378  *   context 1 & 2 reset descriptors and FB state
379  *   context 2 flushes (new compressed tiles written by the draw calls)
380  *   context 1 & 2 read garbage, because DCC is disabled, yet there are
381  *   compressed tiled
382  *
383  * \param sctx  the current context if you have one, or sscreen->aux_context
384  *              if you don't.
385  */
si_texture_disable_dcc(struct si_context * sctx,struct si_texture * tex)386 bool si_texture_disable_dcc(struct si_context *sctx, struct si_texture *tex)
387 {
388    struct si_screen *sscreen = sctx->screen;
389 
390    if (!sctx->has_graphics)
391       return si_texture_discard_dcc(sscreen, tex);
392 
393    if (!si_can_disable_dcc(tex))
394       return false;
395 
396    if (&sctx->b == sscreen->aux_context)
397       simple_mtx_lock(&sscreen->aux_context_lock);
398 
399    /* Decompress DCC. */
400    si_decompress_dcc(sctx, tex);
401    sctx->b.flush(&sctx->b, NULL, 0);
402 
403    if (&sctx->b == sscreen->aux_context)
404       simple_mtx_unlock(&sscreen->aux_context_lock);
405 
406    return si_texture_discard_dcc(sscreen, tex);
407 }
408 
si_reallocate_texture_inplace(struct si_context * sctx,struct si_texture * tex,unsigned new_bind_flag,bool invalidate_storage)409 static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_texture *tex,
410                                           unsigned new_bind_flag, bool invalidate_storage)
411 {
412    struct pipe_screen *screen = sctx->b.screen;
413    struct si_texture *new_tex;
414    struct pipe_resource templ = tex->buffer.b.b;
415    unsigned i;
416 
417    templ.bind |= new_bind_flag;
418 
419    if (tex->buffer.b.is_shared || tex->num_planes > 1)
420       return;
421 
422    if (new_bind_flag == PIPE_BIND_LINEAR) {
423       if (tex->surface.is_linear)
424          return;
425 
426       /* This fails with MSAA, depth, and compressed textures. */
427       if (si_choose_tiling(sctx->screen, &templ, false) != RADEON_SURF_MODE_LINEAR_ALIGNED)
428          return;
429    }
430 
431    new_tex = (struct si_texture *)screen->resource_create(screen, &templ);
432    if (!new_tex)
433       return;
434 
435    /* Copy the pixels to the new texture. */
436    if (!invalidate_storage) {
437       for (i = 0; i <= templ.last_level; i++) {
438          struct pipe_box box;
439 
440          u_box_3d(0, 0, 0, u_minify(templ.width0, i), u_minify(templ.height0, i),
441                   util_num_layers(&templ, i), &box);
442 
443          si_resource_copy_region(&sctx->b, &new_tex->buffer.b.b,
444                                  i, 0, 0, 0, &tex->buffer.b.b, i, &box);
445       }
446    }
447 
448    if (new_bind_flag == PIPE_BIND_LINEAR) {
449       si_texture_discard_cmask(sctx->screen, tex);
450       si_texture_discard_dcc(sctx->screen, tex);
451    }
452 
453    /* Replace the structure fields of tex. */
454    tex->buffer.b.b.bind = templ.bind;
455    radeon_bo_reference(sctx->screen->ws, &tex->buffer.buf, new_tex->buffer.buf);
456    tex->buffer.gpu_address = new_tex->buffer.gpu_address;
457    tex->buffer.memory_usage_kb = new_tex->buffer.memory_usage_kb;
458    tex->buffer.bo_size = new_tex->buffer.bo_size;
459    tex->buffer.bo_alignment_log2 = new_tex->buffer.bo_alignment_log2;
460    tex->buffer.domains = new_tex->buffer.domains;
461    tex->buffer.flags = new_tex->buffer.flags;
462 
463    tex->surface = new_tex->surface;
464    si_texture_reference(&tex->flushed_depth_texture, new_tex->flushed_depth_texture);
465 
466    tex->surface.fmask_offset = new_tex->surface.fmask_offset;
467    tex->surface.cmask_offset = new_tex->surface.cmask_offset;
468    tex->cmask_base_address_reg = new_tex->cmask_base_address_reg;
469 
470    if (tex->cmask_buffer == &tex->buffer)
471       tex->cmask_buffer = NULL;
472    else
473       si_resource_reference(&tex->cmask_buffer, NULL);
474 
475    if (new_tex->cmask_buffer == &new_tex->buffer)
476       tex->cmask_buffer = &tex->buffer;
477    else
478       si_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer);
479 
480    tex->surface.meta_offset = new_tex->surface.meta_offset;
481    tex->cb_color_info = new_tex->cb_color_info;
482    memcpy(tex->color_clear_value, new_tex->color_clear_value, sizeof(tex->color_clear_value));
483    tex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
484 
485    memcpy(tex->depth_clear_value, new_tex->depth_clear_value, sizeof(tex->depth_clear_value));
486    tex->dirty_level_mask = new_tex->dirty_level_mask;
487    tex->stencil_dirty_level_mask = new_tex->stencil_dirty_level_mask;
488    tex->db_render_format = new_tex->db_render_format;
489    memcpy(tex->stencil_clear_value, new_tex->stencil_clear_value, sizeof(tex->stencil_clear_value));
490    tex->tc_compatible_htile = new_tex->tc_compatible_htile;
491    tex->depth_cleared_level_mask_once = new_tex->depth_cleared_level_mask_once;
492    tex->stencil_cleared_level_mask = new_tex->stencil_cleared_level_mask;
493    tex->upgraded_depth = new_tex->upgraded_depth;
494    tex->db_compatible = new_tex->db_compatible;
495    tex->can_sample_z = new_tex->can_sample_z;
496    tex->can_sample_s = new_tex->can_sample_s;
497 
498    tex->displayable_dcc_dirty = new_tex->displayable_dcc_dirty;
499 
500    if (new_bind_flag == PIPE_BIND_LINEAR) {
501       assert(!tex->surface.meta_offset);
502       assert(!tex->cmask_buffer);
503       assert(!tex->surface.fmask_size);
504       assert(!tex->is_depth);
505    }
506 
507    si_texture_reference(&new_tex, NULL);
508 
509    p_atomic_inc(&sctx->screen->dirty_tex_counter);
510 }
511 
si_set_tex_bo_metadata(struct si_screen * sscreen,struct si_texture * tex)512 static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture *tex)
513 {
514    struct pipe_resource *res = &tex->buffer.b.b;
515    struct radeon_bo_metadata md;
516 
517    memset(&md, 0, sizeof(md));
518 
519    assert(tex->surface.fmask_size == 0);
520 
521    static const unsigned char swizzle[] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
522                                            PIPE_SWIZZLE_W};
523    bool is_array = util_texture_is_array(res->target);
524    uint32_t desc[8];
525 
526    sscreen->make_texture_descriptor(sscreen, tex, true, res->target, res->format, swizzle, 0,
527                                     res->last_level, 0, is_array ? res->array_size - 1 : 0,
528                                     res->width0, res->height0, res->depth0, desc, NULL);
529    si_set_mutable_tex_desc_fields(sscreen, tex, &tex->surface.u.legacy.level[0], 0, 0,
530                                   tex->surface.blk_w, false, 0, desc);
531 
532    ac_surface_get_umd_metadata(&sscreen->info, &tex->surface,
533                                tex->buffer.b.b.last_level + 1,
534                                desc, &md.size_metadata, md.metadata);
535    sscreen->ws->buffer_set_metadata(sscreen->ws, tex->buffer.buf, &md, &tex->surface);
536 }
537 
si_displayable_dcc_needs_explicit_flush(struct si_texture * tex)538 static bool si_displayable_dcc_needs_explicit_flush(struct si_texture *tex)
539 {
540    struct si_screen *sscreen = (struct si_screen *)tex->buffer.b.b.screen;
541 
542    if (sscreen->info.chip_class <= GFX8)
543       return false;
544 
545    /* With modifiers and > 1 planes any applications will know that they
546     * cannot do frontbuffer rendering with the texture. */
547    if (ac_surface_get_nplanes(&tex->surface) > 1)
548       return false;
549 
550    return tex->surface.is_displayable && tex->surface.meta_offset;
551 }
552 
si_resource_get_param(struct pipe_screen * screen,struct pipe_context * context,struct pipe_resource * resource,unsigned plane,unsigned layer,unsigned level,enum pipe_resource_param param,unsigned handle_usage,uint64_t * value)553 static bool si_resource_get_param(struct pipe_screen *screen, struct pipe_context *context,
554                                   struct pipe_resource *resource, unsigned plane, unsigned layer,
555                                   unsigned level,
556                                   enum pipe_resource_param param, unsigned handle_usage,
557                                   uint64_t *value)
558 {
559    while (plane && resource->next && !si_texture_is_aux_plane(resource->next)) {
560       --plane;
561       resource = resource->next;
562    }
563 
564    struct si_screen *sscreen = (struct si_screen *)screen;
565    struct si_texture *tex = (struct si_texture *)resource;
566    struct winsys_handle whandle;
567 
568    switch (param) {
569    case PIPE_RESOURCE_PARAM_NPLANES:
570       if (resource->target == PIPE_BUFFER)
571          *value = 1;
572       else if (tex->num_planes > 1)
573          *value = tex->num_planes;
574       else
575          *value = ac_surface_get_nplanes(&tex->surface);
576       return true;
577 
578    case PIPE_RESOURCE_PARAM_STRIDE:
579       if (resource->target == PIPE_BUFFER)
580          *value = 0;
581       else
582          *value = ac_surface_get_plane_stride(sscreen->info.chip_class,
583                                               &tex->surface, plane);
584       return true;
585 
586    case PIPE_RESOURCE_PARAM_OFFSET:
587       if (resource->target == PIPE_BUFFER)
588          *value = 0;
589       else
590          *value = ac_surface_get_plane_offset(sscreen->info.chip_class,
591                                               &tex->surface, plane, layer);
592       return true;
593 
594    case PIPE_RESOURCE_PARAM_MODIFIER:
595       *value = tex->surface.modifier;
596       return true;
597 
598    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED:
599    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS:
600    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD:
601       memset(&whandle, 0, sizeof(whandle));
602 
603       if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED)
604          whandle.type = WINSYS_HANDLE_TYPE_SHARED;
605       else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS)
606          whandle.type = WINSYS_HANDLE_TYPE_KMS;
607       else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD)
608          whandle.type = WINSYS_HANDLE_TYPE_FD;
609 
610       if (!screen->resource_get_handle(screen, context, resource, &whandle, handle_usage))
611          return false;
612 
613       *value = whandle.handle;
614       return true;
615    case PIPE_RESOURCE_PARAM_LAYER_STRIDE:
616       break;
617    }
618    return false;
619 }
620 
si_texture_get_info(struct pipe_screen * screen,struct pipe_resource * resource,unsigned * pstride,unsigned * poffset)621 static void si_texture_get_info(struct pipe_screen *screen, struct pipe_resource *resource,
622                                 unsigned *pstride, unsigned *poffset)
623 {
624    uint64_t value;
625 
626    if (pstride) {
627       si_resource_get_param(screen, NULL, resource, 0, 0, 0, PIPE_RESOURCE_PARAM_STRIDE, 0, &value);
628       *pstride = value;
629    }
630 
631    if (poffset) {
632       si_resource_get_param(screen, NULL, resource, 0, 0, 0, PIPE_RESOURCE_PARAM_OFFSET, 0, &value);
633       *poffset = value;
634    }
635 }
636 
si_texture_get_handle(struct pipe_screen * screen,struct pipe_context * ctx,struct pipe_resource * resource,struct winsys_handle * whandle,unsigned usage)637 static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_context *ctx,
638                                   struct pipe_resource *resource, struct winsys_handle *whandle,
639                                   unsigned usage)
640 {
641    struct si_screen *sscreen = (struct si_screen *)screen;
642    struct si_context *sctx;
643    struct si_resource *res = si_resource(resource);
644    struct si_texture *tex = (struct si_texture *)resource;
645    bool update_metadata = false;
646    unsigned stride, offset, slice_size;
647    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
648    bool flush = false;
649 
650    ctx = threaded_context_unwrap_sync(ctx);
651    sctx = (struct si_context *)(ctx ? ctx : sscreen->aux_context);
652 
653    if (resource->target != PIPE_BUFFER) {
654       unsigned plane = whandle->plane;
655 
656       /* Individual planes are chained pipe_resource instances. */
657       while (plane && resource->next && !si_texture_is_aux_plane(resource->next)) {
658          resource = resource->next;
659          --plane;
660       }
661 
662       res = si_resource(resource);
663       tex = (struct si_texture *)resource;
664 
665       /* This is not supported now, but it might be required for OpenCL
666        * interop in the future.
667        */
668       if (resource->nr_samples > 1 || tex->is_depth)
669          return false;
670 
671       if (plane) {
672          whandle->offset = ac_surface_get_plane_offset(sscreen->info.chip_class,
673                                                        &tex->surface, plane, 0);
674          whandle->stride = ac_surface_get_plane_stride(sscreen->info.chip_class,
675                                                        &tex->surface, plane);
676          whandle->modifier = tex->surface.modifier;
677          return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, whandle);
678       }
679 
680       /* Move a suballocated texture into a non-suballocated allocation. */
681       if (sscreen->ws->buffer_is_suballocated(res->buf) || tex->surface.tile_swizzle ||
682           (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
683            sscreen->info.has_local_buffers)) {
684          assert(!res->b.is_shared);
685          si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_SHARED, false);
686          flush = true;
687          assert(res->b.b.bind & PIPE_BIND_SHARED);
688          assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
689          assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING));
690          assert(tex->surface.tile_swizzle == 0);
691       }
692 
693       /* Since shader image stores don't support DCC on GFX8,
694        * disable it for external clients that want write
695        * access.
696        */
697       if ((usage & PIPE_HANDLE_USAGE_SHADER_WRITE && !tex->is_depth && tex->surface.meta_offset) ||
698           /* Displayable DCC requires an explicit flush. */
699           (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
700            si_displayable_dcc_needs_explicit_flush(tex))) {
701          if (si_texture_disable_dcc(sctx, tex)) {
702             update_metadata = true;
703             /* si_texture_disable_dcc flushes the context */
704             flush = false;
705          }
706       }
707 
708       if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
709           (tex->cmask_buffer || (!tex->is_depth && tex->surface.meta_offset))) {
710          /* Eliminate fast clear (both CMASK and DCC) */
711          bool flushed;
712          si_eliminate_fast_color_clear(sctx, tex, &flushed);
713          /* eliminate_fast_color_clear sometimes flushes the context */
714          if (flushed)
715             flush = false;
716 
717          /* Disable CMASK if flush_resource isn't going
718           * to be called.
719           */
720          if (tex->cmask_buffer)
721             si_texture_discard_cmask(sscreen, tex);
722       }
723 
724       /* Set metadata. */
725       if ((!res->b.is_shared || update_metadata) && whandle->offset == 0)
726          si_set_tex_bo_metadata(sscreen, tex);
727 
728       if (sscreen->info.chip_class >= GFX9) {
729          slice_size = tex->surface.u.gfx9.surf_slice_size;
730       } else {
731          slice_size = (uint64_t)tex->surface.u.legacy.level[0].slice_size_dw * 4;
732       }
733 
734       modifier = tex->surface.modifier;
735    } else {
736       /* Buffer exports are for the OpenCL interop. */
737       /* Move a suballocated buffer into a non-suballocated allocation. */
738       if (sscreen->ws->buffer_is_suballocated(res->buf) ||
739           /* A DMABUF export always fails if the BO is local. */
740           (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
741            sscreen->info.has_local_buffers)) {
742          assert(!res->b.is_shared);
743 
744          /* Allocate a new buffer with PIPE_BIND_SHARED. */
745          struct pipe_resource templ = res->b.b;
746          templ.bind |= PIPE_BIND_SHARED;
747 
748          struct pipe_resource *newb = screen->resource_create(screen, &templ);
749          if (!newb)
750             return false;
751 
752          /* Copy the old buffer contents to the new one. */
753          struct pipe_box box;
754          u_box_1d(0, newb->width0, &box);
755          sctx->b.resource_copy_region(&sctx->b, newb, 0, 0, 0, 0, &res->b.b, 0, &box);
756          flush = true;
757          /* Move the new buffer storage to the old pipe_resource. */
758          si_replace_buffer_storage(&sctx->b, &res->b.b, newb, 0, 0, 0);
759          pipe_resource_reference(&newb, NULL);
760 
761          assert(res->b.b.bind & PIPE_BIND_SHARED);
762          assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
763       }
764 
765       /* Buffers */
766       slice_size = 0;
767    }
768 
769    si_texture_get_info(screen, resource, &stride, &offset);
770 
771    if (res->b.is_shared) {
772       /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
773        * doesn't set it.
774        */
775       res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
776       if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
777          res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
778    } else {
779       res->b.is_shared = true;
780       res->external_usage = usage;
781    }
782 
783    if (flush)
784       sctx->b.flush(&sctx->b, NULL, 0);
785 
786    whandle->stride = stride;
787    whandle->offset = offset + slice_size * whandle->layer;
788    whandle->modifier = modifier;
789 
790    return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, whandle);
791 }
792 
si_print_texture_info(struct si_screen * sscreen,struct si_texture * tex,struct u_log_context * log)793 void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex,
794                            struct u_log_context *log)
795 {
796    int i;
797    FILE *f;
798    char *surf_info = NULL;
799    size_t surf_info_size;
800 
801    /* Common parameters. */
802    u_log_printf(log,
803                 "  Info: npix_x=%u, npix_y=%u, npix_z=%u, "
804                 "array_size=%u, last_level=%u, nsamples=%u",
805                 tex->buffer.b.b.width0, tex->buffer.b.b.height0,
806                 tex->buffer.b.b.depth0, tex->buffer.b.b.array_size,
807                 tex->buffer.b.b.last_level, tex->buffer.b.b.nr_samples);
808 
809    if (tex->is_depth && tex->surface.meta_offset)
810       u_log_printf(log, ", tc_compatible_htile=%u", tex->tc_compatible_htile);
811 
812    u_log_printf(log, ", %s\n",
813                 util_format_short_name(tex->buffer.b.b.format));
814 
815    f = open_memstream(&surf_info, &surf_info_size);
816    if (!f)
817       return;
818    ac_surface_print_info(f, &sscreen->info, &tex->surface);
819    fclose(f);
820    u_log_printf(log, "%s", surf_info);
821    free(surf_info);
822 
823    if (sscreen->info.chip_class >= GFX9) {
824       return;
825    }
826 
827    if (!tex->is_depth && tex->surface.meta_offset) {
828       for (i = 0; i <= tex->buffer.b.b.last_level; i++)
829          u_log_printf(log,
830                       "    DCCLevel[%i]: enabled=%u, offset=%u, "
831                       "fast_clear_size=%u\n",
832                       i, i < tex->surface.num_meta_levels, tex->surface.u.legacy.color.dcc_level[i].dcc_offset,
833                       tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size);
834    }
835 
836    for (i = 0; i <= tex->buffer.b.b.last_level; i++)
837       u_log_printf(log,
838                    "    Level[%i]: offset=%" PRIu64 ", slice_size=%" PRIu64 ", "
839                    "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
840                    "mode=%u, tiling_index = %u\n",
841                    i, (uint64_t)tex->surface.u.legacy.level[i].offset_256B * 256,
842                    (uint64_t)tex->surface.u.legacy.level[i].slice_size_dw * 4,
843                    u_minify(tex->buffer.b.b.width0, i), u_minify(tex->buffer.b.b.height0, i),
844                    u_minify(tex->buffer.b.b.depth0, i), tex->surface.u.legacy.level[i].nblk_x,
845                    tex->surface.u.legacy.level[i].nblk_y, tex->surface.u.legacy.level[i].mode,
846                    tex->surface.u.legacy.tiling_index[i]);
847 
848    if (tex->surface.has_stencil) {
849       for (i = 0; i <= tex->buffer.b.b.last_level; i++) {
850          u_log_printf(log,
851                       "    StencilLevel[%i]: offset=%" PRIu64 ", "
852                       "slice_size=%" PRIu64 ", npix_x=%u, "
853                       "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
854                       "mode=%u, tiling_index = %u\n",
855                       i, (uint64_t)tex->surface.u.legacy.zs.stencil_level[i].offset_256B * 256,
856                       (uint64_t)tex->surface.u.legacy.zs.stencil_level[i].slice_size_dw * 4,
857                       u_minify(tex->buffer.b.b.width0, i), u_minify(tex->buffer.b.b.height0, i),
858                       u_minify(tex->buffer.b.b.depth0, i),
859                       tex->surface.u.legacy.zs.stencil_level[i].nblk_x,
860                       tex->surface.u.legacy.zs.stencil_level[i].nblk_y,
861                       tex->surface.u.legacy.zs.stencil_level[i].mode,
862                       tex->surface.u.legacy.zs.stencil_tiling_index[i]);
863       }
864    }
865 }
866 
867 /**
868  * Common function for si_texture_create and si_texture_from_handle.
869  *
870  * \param screen	screen
871  * \param base		resource template
872  * \param surface	radeon_surf
873  * \param plane0	if a non-zero plane is being created, this is the first plane
874  * \param imported_buf	from si_texture_from_handle
875  * \param offset	offset for non-zero planes or imported buffers
876  * \param alloc_size	the size to allocate if plane0 != NULL
877  * \param alignment	alignment for the allocation
878  */
si_texture_create_object(struct pipe_screen * screen,const struct pipe_resource * base,const struct radeon_surf * surface,const struct si_texture * plane0,struct pb_buffer * imported_buf,uint64_t offset,unsigned pitch_in_bytes,uint64_t alloc_size,unsigned alignment)879 static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
880                                                    const struct pipe_resource *base,
881                                                    const struct radeon_surf *surface,
882                                                    const struct si_texture *plane0,
883                                                    struct pb_buffer *imported_buf,
884                                                    uint64_t offset, unsigned pitch_in_bytes,
885                                                    uint64_t alloc_size, unsigned alignment)
886 {
887    struct si_texture *tex;
888    struct si_resource *resource;
889    struct si_screen *sscreen = (struct si_screen *)screen;
890 
891    if (!sscreen->info.has_3d_cube_border_color_mipmap &&
892        (base->last_level > 0 ||
893         base->target == PIPE_TEXTURE_3D ||
894         base->target == PIPE_TEXTURE_CUBE)) {
895       assert(0);
896       return NULL;
897    }
898 
899    tex = CALLOC_STRUCT_CL(si_texture);
900    if (!tex)
901       goto error;
902 
903    resource = &tex->buffer;
904    resource->b.b = *base;
905    pipe_reference_init(&resource->b.b.reference, 1);
906    resource->b.b.screen = screen;
907 
908    /* don't include stencil-only formats which we don't support for rendering */
909    tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format));
910    tex->surface = *surface;
911 
912    /* Use 1.0 as the default clear value to get optimal ZRANGE_PRECISION if we don't
913     * get a fast clear.
914     */
915    for (unsigned i = 0; i < ARRAY_SIZE(tex->depth_clear_value); i++)
916       tex->depth_clear_value[i] = 1.0;
917 
918    /* On GFX8, HTILE uses different tiling depending on the TC_COMPATIBLE_HTILE
919     * setting, so we have to enable it if we enabled it at allocation.
920     *
921     * GFX9 and later use the same tiling for both, so TC-compatible HTILE can be
922     * enabled on demand.
923     */
924    tex->tc_compatible_htile = (sscreen->info.chip_class == GFX8 &&
925                                tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ||
926                               /* Mipmapping always starts TC-compatible. */
927                               (sscreen->info.chip_class >= GFX8 &&
928                                tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE &&
929                                tex->buffer.b.b.last_level > 0);
930 
931    /* TC-compatible HTILE:
932     * - GFX8 only supports Z32_FLOAT.
933     * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
934    if (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
935       if (sscreen->info.chip_class >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM)
936          tex->db_render_format = base->format;
937       else {
938          tex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
939          tex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
940                                base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
941       }
942    } else {
943       tex->db_render_format = base->format;
944    }
945 
946    /* Applies to GCN. */
947    tex->last_msaa_resolve_target_micro_mode = tex->surface.micro_tile_mode;
948 
949    if (!ac_surface_override_offset_stride(&sscreen->info, &tex->surface,
950                                      tex->buffer.b.b.last_level + 1,
951                                           offset, pitch_in_bytes / tex->surface.bpe))
952       goto error;
953 
954    if (tex->is_depth) {
955       tex->htile_stencil_disabled = !tex->surface.has_stencil;
956 
957       if (sscreen->info.chip_class >= GFX9) {
958          tex->can_sample_z = true;
959          tex->can_sample_s = true;
960 
961          /* Stencil texturing with HTILE doesn't work
962           * with mipmapping on Navi10-14. */
963          if (sscreen->info.chip_class == GFX10 && base->last_level > 0)
964             tex->htile_stencil_disabled = true;
965       } else {
966          tex->can_sample_z = !tex->surface.u.legacy.depth_adjusted;
967          tex->can_sample_s = !tex->surface.u.legacy.stencil_adjusted;
968 
969          /* GFX8 must keep stencil enabled because it can't use Z-only TC-compatible
970           * HTILE because of a hw bug. This has only a small effect on performance
971           * because we lose a little bit of Z precision in order to make space for
972           * stencil in HTILE.
973           */
974          if (sscreen->info.chip_class == GFX8 &&
975              tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE)
976             tex->htile_stencil_disabled = false;
977       }
978 
979       tex->db_compatible = surface->flags & RADEON_SURF_ZBUFFER;
980    } else {
981       if (tex->surface.cmask_offset) {
982          tex->cb_color_info |= S_028C70_FAST_CLEAR(1);
983          tex->cmask_buffer = &tex->buffer;
984       }
985    }
986 
987    if (plane0) {
988       /* The buffer is shared with the first plane. */
989       resource->bo_size = plane0->buffer.bo_size;
990       resource->bo_alignment_log2 = plane0->buffer.bo_alignment_log2;
991       resource->flags = plane0->buffer.flags;
992       resource->domains = plane0->buffer.domains;
993       resource->memory_usage_kb = plane0->buffer.memory_usage_kb;
994 
995       radeon_bo_reference(sscreen->ws, &resource->buf, plane0->buffer.buf);
996       resource->gpu_address = plane0->buffer.gpu_address;
997    } else if (!(surface->flags & RADEON_SURF_IMPORTED)) {
998       /* Create the backing buffer. */
999       si_init_resource_fields(sscreen, resource, alloc_size, alignment);
1000 
1001       if (!si_alloc_resource(sscreen, resource))
1002          goto error;
1003    } else {
1004       resource->buf = imported_buf;
1005       resource->gpu_address = sscreen->ws->buffer_get_virtual_address(resource->buf);
1006       resource->bo_size = imported_buf->size;
1007       resource->bo_alignment_log2 = imported_buf->alignment_log2;
1008       resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
1009       resource->memory_usage_kb = MAX2(1, resource->bo_size / 1024);
1010       if (sscreen->ws->buffer_get_flags)
1011          resource->flags = sscreen->ws->buffer_get_flags(resource->buf);
1012    }
1013 
1014    /* Prepare metadata clears.  */
1015    struct si_clear_info clears[4];
1016    unsigned num_clears = 0;
1017 
1018    if (tex->cmask_buffer) {
1019       /* Initialize the cmask to 0xCC (= compressed state). */
1020       assert(num_clears < ARRAY_SIZE(clears));
1021       si_init_buffer_clear(&clears[num_clears++], &tex->cmask_buffer->b.b,
1022                            tex->surface.cmask_offset, tex->surface.cmask_size,
1023                            0xCCCCCCCC);
1024    }
1025    if (tex->is_depth && tex->surface.meta_offset) {
1026       uint32_t clear_value = 0;
1027 
1028       if (sscreen->info.chip_class >= GFX9 || tex->tc_compatible_htile)
1029          clear_value = 0x0000030F;
1030 
1031       assert(num_clears < ARRAY_SIZE(clears));
1032       si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1033                            tex->surface.meta_size, clear_value);
1034    }
1035 
1036    /* Initialize DCC only if the texture is not being imported. */
1037    if (!(surface->flags & RADEON_SURF_IMPORTED) && !tex->is_depth && tex->surface.meta_offset) {
1038       /* Clear DCC to black for all tiles with DCC enabled.
1039        *
1040        * This fixes corruption in 3DMark Slingshot Extreme, which
1041        * uses uninitialized textures, causing corruption.
1042        */
1043       if (tex->surface.num_meta_levels == tex->buffer.b.b.last_level + 1 &&
1044           tex->buffer.b.b.nr_samples <= 2) {
1045          /* Simple case - all tiles have DCC enabled. */
1046          assert(num_clears < ARRAY_SIZE(clears));
1047          si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1048                               tex->surface.meta_size, DCC_CLEAR_COLOR_0000);
1049       } else if (sscreen->info.chip_class >= GFX9) {
1050          /* Clear to uncompressed. Clearing this to black is complicated. */
1051          assert(num_clears < ARRAY_SIZE(clears));
1052          si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1053                               tex->surface.meta_size, DCC_UNCOMPRESSED);
1054       } else {
1055          /* GFX8: Initialize mipmap levels and multisamples separately. */
1056          if (tex->buffer.b.b.nr_samples >= 2) {
1057             /* Clearing this to black is complicated. */
1058             assert(num_clears < ARRAY_SIZE(clears));
1059             si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1060                                  tex->surface.meta_size, DCC_UNCOMPRESSED);
1061          } else {
1062             /* Clear the enabled mipmap levels to black. */
1063             unsigned size = 0;
1064 
1065             for (unsigned i = 0; i < tex->surface.num_meta_levels; i++) {
1066                if (!tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size)
1067                   break;
1068 
1069                size = tex->surface.u.legacy.color.dcc_level[i].dcc_offset +
1070                       tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size;
1071             }
1072 
1073             /* Mipmap levels with DCC. */
1074             if (size) {
1075                assert(num_clears < ARRAY_SIZE(clears));
1076                si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, size,
1077                                     DCC_CLEAR_COLOR_0000);
1078             }
1079             /* Mipmap levels without DCC. */
1080             if (size != tex->surface.meta_size) {
1081                assert(num_clears < ARRAY_SIZE(clears));
1082                si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset + size,
1083                                     tex->surface.meta_size - size, DCC_UNCOMPRESSED);
1084             }
1085          }
1086       }
1087    }
1088 
1089    /* Initialize displayable DCC that requires the retile blit. */
1090    if (tex->surface.display_dcc_offset && !(surface->flags & RADEON_SURF_IMPORTED)) {
1091       /* Uninitialized DCC can hang the display hw.
1092        * Clear to white to indicate that. */
1093       assert(num_clears < ARRAY_SIZE(clears));
1094       si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.display_dcc_offset,
1095                            tex->surface.u.gfx9.color.display_dcc_size, DCC_CLEAR_COLOR_1111);
1096    }
1097 
1098    /* Execute the clears. */
1099    if (num_clears) {
1100       simple_mtx_lock(&sscreen->aux_context_lock);
1101       si_execute_clears((struct si_context *)sscreen->aux_context,
1102                         clears, num_clears, 0);
1103       sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
1104       simple_mtx_unlock(&sscreen->aux_context_lock);
1105    }
1106 
1107    /* Initialize the CMASK base register value. */
1108    tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
1109 
1110    if (sscreen->debug_flags & DBG(VM)) {
1111       fprintf(stderr,
1112               "VM start=0x%" PRIX64 "  end=0x%" PRIX64
1113               " | Texture %ix%ix%i, %i levels, %i samples, %s\n",
1114               tex->buffer.gpu_address, tex->buffer.gpu_address + tex->buffer.buf->size,
1115               base->width0, base->height0, util_num_layers(base, 0), base->last_level + 1,
1116               base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
1117    }
1118 
1119    if (sscreen->debug_flags & DBG(TEX)) {
1120       puts("Texture:");
1121       struct u_log_context log;
1122       u_log_context_init(&log);
1123       si_print_texture_info(sscreen, tex, &log);
1124       u_log_new_page_print(&log, stdout);
1125       fflush(stdout);
1126       u_log_context_destroy(&log);
1127    }
1128 
1129    return tex;
1130 
1131 error:
1132    FREE_CL(tex);
1133    return NULL;
1134 }
1135 
si_choose_tiling(struct si_screen * sscreen,const struct pipe_resource * templ,bool tc_compatible_htile)1136 static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen,
1137                                               const struct pipe_resource *templ,
1138                                               bool tc_compatible_htile)
1139 {
1140    const struct util_format_description *desc = util_format_description(templ->format);
1141    bool force_tiling = templ->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING;
1142    bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
1143                            !(templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH);
1144 
1145    /* MSAA resources must be 2D tiled. */
1146    if (templ->nr_samples > 1)
1147       return RADEON_SURF_MODE_2D;
1148 
1149    /* Transfer resources should be linear. */
1150    if (templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR)
1151       return RADEON_SURF_MODE_LINEAR_ALIGNED;
1152 
1153    /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8,
1154     * which requires 2D tiling.
1155     */
1156    if (sscreen->info.chip_class == GFX8 && tc_compatible_htile)
1157       return RADEON_SURF_MODE_2D;
1158 
1159    /* Handle common candidates for the linear mode.
1160     * Compressed textures and DB surfaces must always be tiled.
1161     */
1162    if (!force_tiling && !is_depth_stencil && !util_format_is_compressed(templ->format)) {
1163       if (sscreen->debug_flags & DBG(NO_TILING) ||
1164 	  (templ->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_TILING)))
1165          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1166 
1167       /* Tiling doesn't work with the 422 (SUBSAMPLED) formats. */
1168       if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
1169          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1170 
1171       /* Cursors are linear on AMD GCN.
1172        * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
1173       if (templ->bind & PIPE_BIND_CURSOR)
1174          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1175 
1176       if (templ->bind & PIPE_BIND_LINEAR)
1177          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1178 
1179       /* Textures with a very small height are recommended to be linear. */
1180       if (templ->target == PIPE_TEXTURE_1D || templ->target == PIPE_TEXTURE_1D_ARRAY ||
1181           /* Only very thin and long 2D textures should benefit from
1182            * linear_aligned. */
1183           templ->height0 <= 2)
1184          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1185 
1186       /* Textures likely to be mapped often. */
1187       if (templ->usage == PIPE_USAGE_STAGING || templ->usage == PIPE_USAGE_STREAM)
1188          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1189    }
1190 
1191    /* Make small textures 1D tiled. */
1192    if (templ->width0 <= 16 || templ->height0 <= 16 || (sscreen->debug_flags & DBG(NO_2D_TILING)))
1193       return RADEON_SURF_MODE_1D;
1194 
1195    /* The allocator will switch to 1D if needed. */
1196    return RADEON_SURF_MODE_2D;
1197 }
1198 
1199 static struct pipe_resource *
si_texture_create_with_modifier(struct pipe_screen * screen,const struct pipe_resource * templ,uint64_t modifier)1200 si_texture_create_with_modifier(struct pipe_screen *screen,
1201                                 const struct pipe_resource *templ,
1202                                 uint64_t modifier)
1203 {
1204    struct si_screen *sscreen = (struct si_screen *)screen;
1205    bool is_zs = util_format_is_depth_or_stencil(templ->format);
1206 
1207    if (templ->nr_samples >= 2) {
1208       /* This is hackish (overwriting the const pipe_resource template),
1209        * but should be harmless and gallium frontends can also see
1210        * the overriden number of samples in the created pipe_resource.
1211        */
1212       if (is_zs && sscreen->eqaa_force_z_samples) {
1213          ((struct pipe_resource *)templ)->nr_samples =
1214             ((struct pipe_resource *)templ)->nr_storage_samples = sscreen->eqaa_force_z_samples;
1215       } else if (!is_zs && sscreen->eqaa_force_color_samples) {
1216          ((struct pipe_resource *)templ)->nr_samples = sscreen->eqaa_force_coverage_samples;
1217          ((struct pipe_resource *)templ)->nr_storage_samples = sscreen->eqaa_force_color_samples;
1218       }
1219    }
1220 
1221    bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH ||
1222                            templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR;
1223    bool tc_compatible_htile =
1224       sscreen->info.chip_class >= GFX8 &&
1225       /* There are issues with TC-compatible HTILE on Tonga (and
1226        * Iceland is the same design), and documented bug workarounds
1227        * don't help. For example, this fails:
1228        *   piglit/bin/tex-miplevel-selection 'texture()' 2DShadow -auto
1229        */
1230       sscreen->info.family != CHIP_TONGA && sscreen->info.family != CHIP_ICELAND &&
1231       (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
1232       !(sscreen->debug_flags & DBG(NO_HYPERZ)) && !is_flushed_depth &&
1233       templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
1234       is_zs;
1235    enum radeon_surf_mode tile_mode = si_choose_tiling(sscreen, templ, tc_compatible_htile);
1236 
1237    /* This allocates textures with multiple planes like NV12 in 1 buffer. */
1238    enum
1239    {
1240       SI_TEXTURE_MAX_PLANES = 3
1241    };
1242    struct radeon_surf surface[SI_TEXTURE_MAX_PLANES] = {};
1243    struct pipe_resource plane_templ[SI_TEXTURE_MAX_PLANES];
1244    uint64_t plane_offset[SI_TEXTURE_MAX_PLANES] = {};
1245    uint64_t total_size = 0;
1246    unsigned max_alignment = 0;
1247    unsigned num_planes = util_format_get_num_planes(templ->format);
1248    assert(num_planes <= SI_TEXTURE_MAX_PLANES);
1249 
1250    /* Compute texture or plane layouts and offsets. */
1251    for (unsigned i = 0; i < num_planes; i++) {
1252       plane_templ[i] = *templ;
1253       plane_templ[i].format = util_format_get_plane_format(templ->format, i);
1254       plane_templ[i].width0 = util_format_get_plane_width(templ->format, i, templ->width0);
1255       plane_templ[i].height0 = util_format_get_plane_height(templ->format, i, templ->height0);
1256 
1257       /* Multi-plane allocations need PIPE_BIND_SHARED, because we can't
1258        * reallocate the storage to add PIPE_BIND_SHARED, because it's
1259        * shared by 3 pipe_resources.
1260        */
1261       if (num_planes > 1)
1262          plane_templ[i].bind |= PIPE_BIND_SHARED;
1263 
1264       if (si_init_surface(sscreen, &surface[i], &plane_templ[i], tile_mode, modifier,
1265                           false, plane_templ[i].bind & PIPE_BIND_SCANOUT,
1266                           is_flushed_depth, tc_compatible_htile))
1267          return NULL;
1268 
1269       plane_offset[i] = align64(total_size, 1 << surface[i].surf_alignment_log2);
1270       total_size = plane_offset[i] + surface[i].total_size;
1271       max_alignment = MAX2(max_alignment, 1 << surface[i].surf_alignment_log2);
1272    }
1273 
1274    struct si_texture *plane0 = NULL, *last_plane = NULL;
1275 
1276    for (unsigned i = 0; i < num_planes; i++) {
1277       struct si_texture *tex =
1278          si_texture_create_object(screen, &plane_templ[i], &surface[i], plane0, NULL,
1279                                   plane_offset[i], 0, total_size, max_alignment);
1280       if (!tex) {
1281          si_texture_reference(&plane0, NULL);
1282          return NULL;
1283       }
1284 
1285       tex->plane_index = i;
1286       tex->num_planes = num_planes;
1287 
1288       if (!plane0) {
1289          plane0 = last_plane = tex;
1290       } else {
1291          last_plane->buffer.b.b.next = &tex->buffer.b.b;
1292          last_plane = tex;
1293       }
1294    }
1295 
1296    return (struct pipe_resource *)plane0;
1297 }
1298 
si_texture_create(struct pipe_screen * screen,const struct pipe_resource * templ)1299 struct pipe_resource *si_texture_create(struct pipe_screen *screen,
1300                                         const struct pipe_resource *templ)
1301 {
1302    return si_texture_create_with_modifier(screen, templ, DRM_FORMAT_MOD_INVALID);
1303 }
1304 
si_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * count)1305 static void si_query_dmabuf_modifiers(struct pipe_screen *screen,
1306                                       enum pipe_format format,
1307                                       int max,
1308                                       uint64_t *modifiers,
1309                                       unsigned int *external_only,
1310                                       int *count)
1311 {
1312    struct si_screen *sscreen = (struct si_screen *)screen;
1313 
1314    unsigned ac_mod_count = max;
1315    ac_get_supported_modifiers(&sscreen->info, &(struct ac_modifier_options) {
1316          .dcc = !(sscreen->debug_flags & DBG(NO_DCC)),
1317          /* Do not support DCC with retiling yet. This needs explicit
1318           * resource flushes, but the app has no way to promise doing
1319           * flushes with modifiers. */
1320          .dcc_retile = !(sscreen->debug_flags & DBG(NO_DCC)),
1321       }, format, &ac_mod_count,  max ? modifiers : NULL);
1322    if (max && external_only) {
1323       for (unsigned i = 0; i < ac_mod_count; ++i)
1324          external_only[i] = util_format_is_yuv(format);
1325    }
1326    *count = ac_mod_count;
1327 }
1328 
1329 static bool
si_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)1330 si_is_dmabuf_modifier_supported(struct pipe_screen *screen,
1331                                uint64_t modifier,
1332                                enum pipe_format format,
1333                                bool *external_only)
1334 {
1335    int allowed_mod_count;
1336    si_query_dmabuf_modifiers(screen, format, 0, NULL, NULL, &allowed_mod_count);
1337 
1338    uint64_t *allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t));
1339    if (!allowed_modifiers)
1340       return false;
1341 
1342    unsigned *external_array = NULL;
1343    if (external_only) {
1344       external_array = (unsigned *)calloc(allowed_mod_count, sizeof(unsigned));
1345       if (!external_array) {
1346          free(allowed_modifiers);
1347          return false;
1348       }
1349    }
1350 
1351    si_query_dmabuf_modifiers(screen, format, allowed_mod_count, allowed_modifiers,
1352                             external_array, &allowed_mod_count);
1353 
1354    bool supported = false;
1355    for (int i = 0; i < allowed_mod_count && !supported; ++i) {
1356       if (allowed_modifiers[i] != modifier)
1357          continue;
1358 
1359       supported = true;
1360       if (external_only)
1361          *external_only = external_array[i];
1362    }
1363 
1364    free(allowed_modifiers);
1365    free(external_array);
1366    return supported;
1367 }
1368 
1369 static unsigned
si_get_dmabuf_modifier_planes(struct pipe_screen * pscreen,uint64_t modifier,enum pipe_format format)1370 si_get_dmabuf_modifier_planes(struct pipe_screen *pscreen, uint64_t modifier,
1371                              enum pipe_format format)
1372 {
1373    unsigned planes = util_format_get_num_planes(format);
1374 
1375    if (IS_AMD_FMT_MOD(modifier) && planes == 1) {
1376       if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
1377          return 3;
1378       else if (AMD_FMT_MOD_GET(DCC, modifier))
1379          return 2;
1380       else
1381          return 1;
1382    }
1383 
1384    return planes;
1385 }
1386 
1387 static bool
si_modifier_supports_resource(struct pipe_screen * screen,uint64_t modifier,const struct pipe_resource * templ)1388 si_modifier_supports_resource(struct pipe_screen *screen,
1389                               uint64_t modifier,
1390                               const struct pipe_resource *templ)
1391 {
1392    struct si_screen *sscreen = (struct si_screen *)screen;
1393    uint32_t max_width, max_height;
1394 
1395    ac_modifier_max_extent(&sscreen->info, modifier, &max_width, &max_height);
1396    return templ->width0 <= max_width && templ->height0 <= max_height;
1397 }
1398 
1399 static struct pipe_resource *
si_texture_create_with_modifiers(struct pipe_screen * screen,const struct pipe_resource * templ,const uint64_t * modifiers,int modifier_count)1400 si_texture_create_with_modifiers(struct pipe_screen *screen,
1401                                  const struct pipe_resource *templ,
1402                                  const uint64_t *modifiers,
1403                                  int modifier_count)
1404 {
1405    /* Buffers with modifiers make zero sense. */
1406    assert(templ->target != PIPE_BUFFER);
1407 
1408    /* Select modifier. */
1409    int allowed_mod_count;
1410    si_query_dmabuf_modifiers(screen, templ->format, 0, NULL, NULL, &allowed_mod_count);
1411 
1412    uint64_t *allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t));
1413    if (!allowed_modifiers) {
1414       return NULL;
1415    }
1416 
1417    /* This does not take external_only into account. We assume it is the same for all modifiers. */
1418    si_query_dmabuf_modifiers(screen, templ->format, allowed_mod_count, allowed_modifiers, NULL, &allowed_mod_count);
1419 
1420    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1421 
1422    /* Try to find the first allowed modifier that is in the application provided
1423     * list. We assume that the allowed modifiers are ordered in descending
1424     * preference in the list provided by si_query_dmabuf_modifiers. */
1425    for (int i = 0; i < allowed_mod_count; ++i) {
1426       bool found = false;
1427       for (int j = 0; j < modifier_count && !found; ++j)
1428          if (modifiers[j] == allowed_modifiers[i] && si_modifier_supports_resource(screen, modifiers[j], templ))
1429             found = true;
1430 
1431       if (found) {
1432          modifier = allowed_modifiers[i];
1433          break;
1434       }
1435    }
1436 
1437    free(allowed_modifiers);
1438 
1439    if (modifier == DRM_FORMAT_MOD_INVALID) {
1440       return NULL;
1441    }
1442    return si_texture_create_with_modifier(screen, templ, modifier);
1443 }
1444 
si_texture_is_aux_plane(const struct pipe_resource * resource)1445 static bool si_texture_is_aux_plane(const struct pipe_resource *resource)
1446 {
1447    return resource->flags & SI_RESOURCE_AUX_PLANE;
1448 }
1449 
si_texture_from_winsys_buffer(struct si_screen * sscreen,const struct pipe_resource * templ,struct pb_buffer * buf,unsigned stride,uint64_t offset,uint64_t modifier,unsigned usage,bool dedicated)1450 static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *sscreen,
1451                                                            const struct pipe_resource *templ,
1452                                                            struct pb_buffer *buf, unsigned stride,
1453                                                            uint64_t offset, uint64_t modifier,
1454                                                            unsigned usage, bool dedicated)
1455 {
1456    struct radeon_surf surface = {};
1457    struct radeon_bo_metadata metadata = {};
1458    struct si_texture *tex;
1459    int r;
1460 
1461    /* Ignore metadata for non-zero planes. */
1462    if (offset != 0)
1463       dedicated = false;
1464 
1465    if (dedicated) {
1466       sscreen->ws->buffer_get_metadata(sscreen->ws, buf, &metadata, &surface);
1467    } else {
1468       /**
1469        * The bo metadata is unset for un-dedicated images. So we fall
1470        * back to linear. See answer to question 5 of the
1471        * VK_KHX_external_memory spec for some details.
1472        *
1473        * It is possible that this case isn't going to work if the
1474        * surface pitch isn't correctly aligned by default.
1475        *
1476        * In order to support it correctly we require multi-image
1477        * metadata to be synchronized between radv and radeonsi. The
1478        * semantics of associating multiple image metadata to a memory
1479        * object on the vulkan export side are not concretely defined
1480        * either.
1481        *
1482        * All the use cases we are aware of at the moment for memory
1483        * objects use dedicated allocations. So lets keep the initial
1484        * implementation simple.
1485        *
1486        * A possible alternative is to attempt to reconstruct the
1487        * tiling information when the TexParameter TEXTURE_TILING_EXT
1488        * is set.
1489        */
1490       metadata.mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
1491    }
1492 
1493    r = si_init_surface(sscreen, &surface, templ, metadata.mode, modifier, true,
1494                        surface.flags & RADEON_SURF_SCANOUT, false, false);
1495    if (r)
1496       return NULL;
1497 
1498    tex = si_texture_create_object(&sscreen->b, templ, &surface, NULL, buf,
1499                                   offset, stride, 0, 0);
1500    if (!tex)
1501       return NULL;
1502 
1503    tex->buffer.b.is_shared = true;
1504    tex->buffer.external_usage = usage;
1505    tex->num_planes = 1;
1506    if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)
1507       tex->buffer.b.b.bind |= PIPE_BIND_PROTECTED;
1508 
1509    /* Account for multiple planes with lowered yuv import. */
1510    struct pipe_resource *next_plane = tex->buffer.b.b.next;
1511    while (next_plane && !si_texture_is_aux_plane(next_plane)) {
1512       struct si_texture *next_tex = (struct si_texture *)next_plane;
1513       ++next_tex->num_planes;
1514       ++tex->num_planes;
1515       next_plane = next_plane->next;
1516    }
1517 
1518    unsigned nplanes = ac_surface_get_nplanes(&tex->surface);
1519    unsigned plane = 1;
1520    while (next_plane) {
1521       struct si_auxiliary_texture *ptex = (struct si_auxiliary_texture *)next_plane;
1522       if (plane >= nplanes || ptex->buffer != tex->buffer.buf ||
1523           ptex->offset != ac_surface_get_plane_offset(sscreen->info.chip_class,
1524                                                       &tex->surface, plane, 0) ||
1525           ptex->stride != ac_surface_get_plane_stride(sscreen->info.chip_class,
1526                                                       &tex->surface, plane)) {
1527          si_texture_reference(&tex, NULL);
1528          return NULL;
1529       }
1530       ++plane;
1531       next_plane = next_plane->next;
1532    }
1533 
1534    if (plane != nplanes && tex->num_planes == 1) {
1535       si_texture_reference(&tex, NULL);
1536       return NULL;
1537    }
1538 
1539    if (!ac_surface_set_umd_metadata(&sscreen->info, &tex->surface,
1540                                     tex->buffer.b.b.nr_storage_samples,
1541                                     tex->buffer.b.b.last_level + 1,
1542                                     metadata.size_metadata,
1543                                     metadata.metadata)) {
1544       si_texture_reference(&tex, NULL);
1545       return NULL;
1546    }
1547 
1548    if (ac_surface_get_plane_offset(sscreen->info.chip_class, &tex->surface, 0, 0) +
1549         tex->surface.total_size > buf->size ||
1550        buf->alignment_log2 < tex->surface.alignment_log2) {
1551       si_texture_reference(&tex, NULL);
1552       return NULL;
1553    }
1554 
1555    /* Displayable DCC requires an explicit flush. */
1556    if (dedicated && offset == 0 && !(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
1557        si_displayable_dcc_needs_explicit_flush(tex)) {
1558       /* TODO: do we need to decompress DCC? */
1559       if (si_texture_discard_dcc(sscreen, tex)) {
1560          /* Update BO metadata after disabling DCC. */
1561          si_set_tex_bo_metadata(sscreen, tex);
1562       }
1563    }
1564 
1565    assert(tex->surface.tile_swizzle == 0);
1566    return &tex->buffer.b.b;
1567 }
1568 
si_texture_from_handle(struct pipe_screen * screen,const struct pipe_resource * templ,struct winsys_handle * whandle,unsigned usage)1569 static struct pipe_resource *si_texture_from_handle(struct pipe_screen *screen,
1570                                                     const struct pipe_resource *templ,
1571                                                     struct winsys_handle *whandle, unsigned usage)
1572 {
1573    struct si_screen *sscreen = (struct si_screen *)screen;
1574    struct pb_buffer *buf = NULL;
1575 
1576    /* Support only 2D textures without mipmaps */
1577    if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT &&
1578         templ->target != PIPE_TEXTURE_2D_ARRAY) ||
1579        templ->last_level != 0)
1580       return NULL;
1581 
1582    buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, sscreen->info.max_alignment);
1583    if (!buf)
1584       return NULL;
1585 
1586    if (whandle->plane >= util_format_get_num_planes(whandle->format)) {
1587       struct si_auxiliary_texture *tex = CALLOC_STRUCT_CL(si_auxiliary_texture);
1588       if (!tex)
1589          return NULL;
1590       tex->b.b = *templ;
1591       tex->b.b.flags |= SI_RESOURCE_AUX_PLANE;
1592       tex->stride = whandle->stride;
1593       tex->offset = whandle->offset;
1594       tex->buffer = buf;
1595       pipe_reference_init(&tex->b.b.reference, 1);
1596       tex->b.b.screen = screen;
1597 
1598       return &tex->b.b;
1599    }
1600 
1601    return si_texture_from_winsys_buffer(sscreen, templ, buf, whandle->stride, whandle->offset,
1602                                         whandle->modifier, usage, true);
1603 }
1604 
si_init_flushed_depth_texture(struct pipe_context * ctx,struct pipe_resource * texture)1605 bool si_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resource *texture)
1606 {
1607    struct si_texture *tex = (struct si_texture *)texture;
1608    struct pipe_resource resource;
1609    enum pipe_format pipe_format = texture->format;
1610 
1611    assert(!tex->flushed_depth_texture);
1612 
1613    if (!tex->can_sample_z && tex->can_sample_s) {
1614       switch (pipe_format) {
1615       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1616          /* Save memory by not allocating the S plane. */
1617          pipe_format = PIPE_FORMAT_Z32_FLOAT;
1618          break;
1619       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1620       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1621          /* Save memory bandwidth by not copying the
1622           * stencil part during flush.
1623           *
1624           * This potentially increases memory bandwidth
1625           * if an application uses both Z and S texturing
1626           * simultaneously (a flushed Z24S8 texture
1627           * would be stored compactly), but how often
1628           * does that really happen?
1629           */
1630          pipe_format = PIPE_FORMAT_Z24X8_UNORM;
1631          break;
1632       default:;
1633       }
1634    } else if (!tex->can_sample_s && tex->can_sample_z) {
1635       assert(util_format_has_stencil(util_format_description(pipe_format)));
1636 
1637       /* DB->CB copies to an 8bpp surface don't work. */
1638       pipe_format = PIPE_FORMAT_X24S8_UINT;
1639    }
1640 
1641    memset(&resource, 0, sizeof(resource));
1642    resource.target = texture->target;
1643    resource.format = pipe_format;
1644    resource.width0 = texture->width0;
1645    resource.height0 = texture->height0;
1646    resource.depth0 = texture->depth0;
1647    resource.array_size = texture->array_size;
1648    resource.last_level = texture->last_level;
1649    resource.nr_samples = texture->nr_samples;
1650    resource.usage = PIPE_USAGE_DEFAULT;
1651    resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
1652    resource.flags = texture->flags | SI_RESOURCE_FLAG_FLUSHED_DEPTH;
1653 
1654    tex->flushed_depth_texture =
1655       (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1656    if (!tex->flushed_depth_texture) {
1657       PRINT_ERR("failed to create temporary texture to hold flushed depth\n");
1658       return false;
1659    }
1660    return true;
1661 }
1662 
1663 /**
1664  * Initialize the pipe_resource descriptor to be of the same size as the box,
1665  * which is supposed to hold a subregion of the texture "orig" at the given
1666  * mipmap level.
1667  */
si_init_temp_resource_from_box(struct pipe_resource * res,struct pipe_resource * orig,const struct pipe_box * box,unsigned level,unsigned usage,unsigned flags)1668 static void si_init_temp_resource_from_box(struct pipe_resource *res, struct pipe_resource *orig,
1669                                            const struct pipe_box *box, unsigned level,
1670                                            unsigned usage, unsigned flags)
1671 {
1672    memset(res, 0, sizeof(*res));
1673    res->format = orig->format;
1674    res->width0 = box->width;
1675    res->height0 = box->height;
1676    res->depth0 = 1;
1677    res->array_size = 1;
1678    res->usage = usage;
1679    res->flags = flags;
1680 
1681    if (flags & SI_RESOURCE_FLAG_FORCE_LINEAR && util_format_is_compressed(orig->format)) {
1682       /* Transfer resources are allocated with linear tiling, which is
1683        * not supported for compressed formats.
1684        */
1685       unsigned blocksize = util_format_get_blocksize(orig->format);
1686 
1687       if (blocksize == 8) {
1688          res->format = PIPE_FORMAT_R16G16B16A16_UINT;
1689       } else {
1690          assert(blocksize == 16);
1691          res->format = PIPE_FORMAT_R32G32B32A32_UINT;
1692       }
1693 
1694       res->width0 = util_format_get_nblocksx(orig->format, box->width);
1695       res->height0 = util_format_get_nblocksy(orig->format, box->height);
1696    }
1697 
1698    /* We must set the correct texture target and dimensions for a 3D box. */
1699    if (box->depth > 1 && util_max_layer(orig, level) > 0) {
1700       res->target = PIPE_TEXTURE_2D_ARRAY;
1701       res->array_size = box->depth;
1702    } else {
1703       res->target = PIPE_TEXTURE_2D;
1704    }
1705 }
1706 
si_can_invalidate_texture(struct si_screen * sscreen,struct si_texture * tex,unsigned transfer_usage,const struct pipe_box * box)1707 static bool si_can_invalidate_texture(struct si_screen *sscreen, struct si_texture *tex,
1708                                       unsigned transfer_usage, const struct pipe_box *box)
1709 {
1710    return !tex->buffer.b.is_shared && !(tex->surface.flags & RADEON_SURF_IMPORTED) &&
1711           !(transfer_usage & PIPE_MAP_READ) && tex->buffer.b.b.last_level == 0 &&
1712           util_texrange_covers_whole_level(&tex->buffer.b.b, 0, box->x, box->y, box->z, box->width,
1713                                            box->height, box->depth);
1714 }
1715 
si_texture_invalidate_storage(struct si_context * sctx,struct si_texture * tex)1716 static void si_texture_invalidate_storage(struct si_context *sctx, struct si_texture *tex)
1717 {
1718    struct si_screen *sscreen = sctx->screen;
1719 
1720    /* There is no point in discarding depth and tiled buffers. */
1721    assert(!tex->is_depth);
1722    assert(tex->surface.is_linear);
1723 
1724    /* Reallocate the buffer in the same pipe_resource. */
1725    si_alloc_resource(sscreen, &tex->buffer);
1726 
1727    /* Initialize the CMASK base address (needed even without CMASK). */
1728    tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
1729 
1730    p_atomic_inc(&sscreen->dirty_tex_counter);
1731 
1732    sctx->num_alloc_tex_transfer_bytes += tex->surface.total_size;
1733 }
1734 
si_texture_transfer_map(struct pipe_context * ctx,struct pipe_resource * texture,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)1735 static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resource *texture,
1736                                      unsigned level, unsigned usage, const struct pipe_box *box,
1737                                      struct pipe_transfer **ptransfer)
1738 {
1739    struct si_context *sctx = (struct si_context *)ctx;
1740    struct si_texture *tex = (struct si_texture *)texture;
1741    struct si_transfer *trans;
1742    struct si_resource *buf;
1743    unsigned offset = 0;
1744    char *map;
1745    bool use_staging_texture = tex->buffer.flags & RADEON_FLAG_ENCRYPTED;
1746 
1747    assert(!(texture->flags & SI_RESOURCE_FLAG_FORCE_LINEAR));
1748    assert(box->width && box->height && box->depth);
1749 
1750    if (tex->buffer.b.b.flags & SI_RESOURCE_AUX_PLANE)
1751       return NULL;
1752 
1753    if ((tex->buffer.flags & RADEON_FLAG_ENCRYPTED) && usage & PIPE_MAP_READ)
1754       return NULL;
1755 
1756    if (tex->is_depth) {
1757       /* Depth textures use staging unconditionally. */
1758       use_staging_texture = true;
1759    } else {
1760       /* Degrade the tile mode if we get too many transfers on APUs.
1761        * On dGPUs, the staging texture is always faster.
1762        * Only count uploads that are at least 4x4 pixels large.
1763        */
1764       if (!sctx->screen->info.has_dedicated_vram && level == 0 && box->width >= 4 &&
1765           box->height >= 4 && p_atomic_inc_return(&tex->num_level0_transfers) == 10) {
1766          bool can_invalidate = si_can_invalidate_texture(sctx->screen, tex, usage, box);
1767 
1768          si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_LINEAR, can_invalidate);
1769       }
1770 
1771       /* Tiled textures need to be converted into a linear texture for CPU
1772        * access. The staging texture is always linear and is placed in GART.
1773        *
1774        * dGPU use a staging texture for VRAM, so that we don't map it and
1775        * don't relocate it to GTT.
1776        *
1777        * Reading from VRAM or GTT WC is slow, always use the staging
1778        * texture in this case.
1779        *
1780        * Use the staging texture for uploads if the underlying BO
1781        * is busy.
1782        */
1783       if (!tex->surface.is_linear || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) ||
1784           (tex->buffer.domains & RADEON_DOMAIN_VRAM && sctx->screen->info.has_dedicated_vram &&
1785            !sctx->screen->info.smart_access_memory))
1786          use_staging_texture = true;
1787       else if (usage & PIPE_MAP_READ)
1788          use_staging_texture =
1789             tex->buffer.domains & RADEON_DOMAIN_VRAM || tex->buffer.flags & RADEON_FLAG_GTT_WC;
1790       /* Write & linear only: */
1791       else if (si_cs_is_buffer_referenced(sctx, tex->buffer.buf, RADEON_USAGE_READWRITE) ||
1792                !sctx->ws->buffer_wait(sctx->ws, tex->buffer.buf, 0, RADEON_USAGE_READWRITE)) {
1793          /* It's busy. */
1794          if (si_can_invalidate_texture(sctx->screen, tex, usage, box))
1795             si_texture_invalidate_storage(sctx, tex);
1796          else
1797             use_staging_texture = true;
1798       }
1799    }
1800 
1801    trans = CALLOC_STRUCT(si_transfer);
1802    if (!trans)
1803       return NULL;
1804    pipe_resource_reference(&trans->b.b.resource, texture);
1805    trans->b.b.level = level;
1806    trans->b.b.usage = usage;
1807    trans->b.b.box = *box;
1808 
1809    if (use_staging_texture) {
1810       struct pipe_resource resource;
1811       struct si_texture *staging;
1812       unsigned bo_usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
1813       unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR | SI_RESOURCE_FLAG_DRIVER_INTERNAL;
1814 
1815       si_init_temp_resource_from_box(&resource, texture, box, level, bo_usage,
1816                                      bo_flags);
1817 
1818       /* Since depth-stencil textures don't support linear tiling,
1819        * blit from ZS to color and vice versa. u_blitter will do
1820        * the packing for these formats.
1821        */
1822       if (tex->is_depth)
1823          resource.format = util_blitter_get_color_format_for_zs(resource.format);
1824 
1825       /* Create the temporary texture. */
1826       staging = (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1827       if (!staging) {
1828          PRINT_ERR("failed to create temporary texture to hold untiled copy\n");
1829          goto fail_trans;
1830       }
1831       trans->staging = &staging->buffer;
1832 
1833       /* Just get the strides. */
1834       si_texture_get_offset(sctx->screen, staging, 0, NULL, &trans->b.b.stride,
1835                             &trans->b.b.layer_stride);
1836 
1837       if (usage & PIPE_MAP_READ)
1838          si_copy_to_staging_texture(ctx, trans);
1839       else
1840          usage |= PIPE_MAP_UNSYNCHRONIZED;
1841 
1842       buf = trans->staging;
1843    } else {
1844       /* the resource is mapped directly */
1845       offset = si_texture_get_offset(sctx->screen, tex, level, box, &trans->b.b.stride,
1846                                      &trans->b.b.layer_stride);
1847       buf = &tex->buffer;
1848    }
1849 
1850    /* Always unmap texture CPU mappings on 32-bit architectures, so that
1851     * we don't run out of the CPU address space.
1852     */
1853    if (sizeof(void *) == 4)
1854       usage |= RADEON_MAP_TEMPORARY;
1855 
1856    if (!(map = si_buffer_map(sctx, buf, usage)))
1857       goto fail_trans;
1858 
1859    *ptransfer = &trans->b.b;
1860    return map + offset;
1861 
1862 fail_trans:
1863    si_resource_reference(&trans->staging, NULL);
1864    pipe_resource_reference(&trans->b.b.resource, NULL);
1865    FREE(trans);
1866    return NULL;
1867 }
1868 
si_texture_transfer_unmap(struct pipe_context * ctx,struct pipe_transfer * transfer)1869 static void si_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer)
1870 {
1871    struct si_context *sctx = (struct si_context *)ctx;
1872    struct si_transfer *stransfer = (struct si_transfer *)transfer;
1873    struct pipe_resource *texture = transfer->resource;
1874    struct si_texture *tex = (struct si_texture *)texture;
1875 
1876    /* Always unmap texture CPU mappings on 32-bit architectures, so that
1877     * we don't run out of the CPU address space.
1878     */
1879    if (sizeof(void *) == 4) {
1880       struct si_resource *buf = stransfer->staging ? stransfer->staging : &tex->buffer;
1881 
1882       sctx->ws->buffer_unmap(sctx->ws, buf->buf);
1883    }
1884 
1885    if ((transfer->usage & PIPE_MAP_WRITE) && stransfer->staging)
1886       si_copy_from_staging_texture(ctx, stransfer);
1887 
1888    if (stransfer->staging) {
1889       sctx->num_alloc_tex_transfer_bytes += stransfer->staging->buf->size;
1890       si_resource_reference(&stransfer->staging, NULL);
1891    }
1892 
1893    /* Heuristic for {upload, draw, upload, draw, ..}:
1894     *
1895     * Flush the gfx IB if we've allocated too much texture storage.
1896     *
1897     * The idea is that we don't want to build IBs that use too much
1898     * memory and put pressure on the kernel memory manager and we also
1899     * want to make temporary and invalidated buffers go idle ASAP to
1900     * decrease the total memory usage or make them reusable. The memory
1901     * usage will be slightly higher than given here because of the buffer
1902     * cache in the winsys.
1903     *
1904     * The result is that the kernel memory manager is never a bottleneck.
1905     */
1906    if (sctx->num_alloc_tex_transfer_bytes > sctx->screen->info.gart_size / 4) {
1907       si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
1908       sctx->num_alloc_tex_transfer_bytes = 0;
1909    }
1910 
1911    pipe_resource_reference(&transfer->resource, NULL);
1912    FREE(transfer);
1913 }
1914 
1915 /* Return if it's allowed to reinterpret one format as another with DCC enabled.
1916  */
vi_dcc_formats_compatible(struct si_screen * sscreen,enum pipe_format format1,enum pipe_format format2)1917 bool vi_dcc_formats_compatible(struct si_screen *sscreen, enum pipe_format format1,
1918                                enum pipe_format format2)
1919 {
1920    const struct util_format_description *desc1, *desc2;
1921 
1922    /* No format change - exit early. */
1923    if (format1 == format2)
1924       return true;
1925 
1926    format1 = si_simplify_cb_format(format1);
1927    format2 = si_simplify_cb_format(format2);
1928 
1929    /* Check again after format adjustments. */
1930    if (format1 == format2)
1931       return true;
1932 
1933    desc1 = util_format_description(format1);
1934    desc2 = util_format_description(format2);
1935 
1936    if (desc1->layout != UTIL_FORMAT_LAYOUT_PLAIN || desc2->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1937       return false;
1938 
1939    /* Float and non-float are totally incompatible. */
1940    if ((desc1->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) !=
1941        (desc2->channel[0].type == UTIL_FORMAT_TYPE_FLOAT))
1942       return false;
1943 
1944    /* Channel sizes must match across DCC formats.
1945     * Comparing just the first 2 channels should be enough.
1946     */
1947    if (desc1->channel[0].size != desc2->channel[0].size ||
1948        (desc1->nr_channels >= 2 && desc1->channel[1].size != desc2->channel[1].size))
1949       return false;
1950 
1951    /* Everything below is not needed if the driver never uses the DCC
1952     * clear code with the value of 1.
1953     */
1954 
1955    /* If the clear values are all 1 or all 0, this constraint can be
1956     * ignored. */
1957    if (vi_alpha_is_on_msb(sscreen, format1) != vi_alpha_is_on_msb(sscreen, format2))
1958       return false;
1959 
1960    /* Channel types must match if the clear value of 1 is used.
1961     * The type categories are only float, signed, unsigned.
1962     * NORM and INT are always compatible.
1963     */
1964    if (desc1->channel[0].type != desc2->channel[0].type ||
1965        (desc1->nr_channels >= 2 && desc1->channel[1].type != desc2->channel[1].type))
1966       return false;
1967 
1968    return true;
1969 }
1970 
vi_dcc_formats_are_incompatible(struct pipe_resource * tex,unsigned level,enum pipe_format view_format)1971 bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, unsigned level,
1972                                      enum pipe_format view_format)
1973 {
1974    struct si_texture *stex = (struct si_texture *)tex;
1975 
1976    return vi_dcc_enabled(stex, level) &&
1977           !vi_dcc_formats_compatible((struct si_screen *)tex->screen, tex->format, view_format);
1978 }
1979 
1980 /* This can't be merged with the above function, because
1981  * vi_dcc_formats_compatible should be called only when DCC is enabled. */
vi_disable_dcc_if_incompatible_format(struct si_context * sctx,struct pipe_resource * tex,unsigned level,enum pipe_format view_format)1982 void vi_disable_dcc_if_incompatible_format(struct si_context *sctx, struct pipe_resource *tex,
1983                                            unsigned level, enum pipe_format view_format)
1984 {
1985    struct si_texture *stex = (struct si_texture *)tex;
1986 
1987    if (vi_dcc_formats_are_incompatible(tex, level, view_format))
1988       if (!si_texture_disable_dcc(sctx, stex))
1989          si_decompress_dcc(sctx, stex);
1990 }
1991 
si_create_surface_custom(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_surface * templ,unsigned width0,unsigned height0,unsigned width,unsigned height)1992 struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
1993                                               struct pipe_resource *texture,
1994                                               const struct pipe_surface *templ, unsigned width0,
1995                                               unsigned height0, unsigned width, unsigned height)
1996 {
1997    struct si_surface *surface = CALLOC_STRUCT(si_surface);
1998 
1999    if (!surface)
2000       return NULL;
2001 
2002    assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
2003    assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
2004 
2005    pipe_reference_init(&surface->base.reference, 1);
2006    pipe_resource_reference(&surface->base.texture, texture);
2007    surface->base.context = pipe;
2008    surface->base.format = templ->format;
2009    surface->base.width = width;
2010    surface->base.height = height;
2011    surface->base.u = templ->u;
2012 
2013    surface->width0 = width0;
2014    surface->height0 = height0;
2015 
2016    surface->dcc_incompatible =
2017       texture->target != PIPE_BUFFER &&
2018       vi_dcc_formats_are_incompatible(texture, templ->u.tex.level, templ->format);
2019    return &surface->base;
2020 }
2021 
si_create_surface(struct pipe_context * pipe,struct pipe_resource * tex,const struct pipe_surface * templ)2022 static struct pipe_surface *si_create_surface(struct pipe_context *pipe, struct pipe_resource *tex,
2023                                               const struct pipe_surface *templ)
2024 {
2025    unsigned level = templ->u.tex.level;
2026    unsigned width = u_minify(tex->width0, level);
2027    unsigned height = u_minify(tex->height0, level);
2028    unsigned width0 = tex->width0;
2029    unsigned height0 = tex->height0;
2030 
2031    if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
2032       const struct util_format_description *tex_desc = util_format_description(tex->format);
2033       const struct util_format_description *templ_desc = util_format_description(templ->format);
2034 
2035       assert(tex_desc->block.bits == templ_desc->block.bits);
2036 
2037       /* Adjust size of surface if and only if the block width or
2038        * height is changed. */
2039       if (tex_desc->block.width != templ_desc->block.width ||
2040           tex_desc->block.height != templ_desc->block.height) {
2041          unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
2042          unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
2043 
2044          width = nblks_x * templ_desc->block.width;
2045          height = nblks_y * templ_desc->block.height;
2046 
2047          width0 = util_format_get_nblocksx(tex->format, width0);
2048          height0 = util_format_get_nblocksy(tex->format, height0);
2049       }
2050    }
2051 
2052    return si_create_surface_custom(pipe, tex, templ, width0, height0, width, height);
2053 }
2054 
si_surface_destroy(struct pipe_context * pipe,struct pipe_surface * surface)2055 static void si_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surface)
2056 {
2057    pipe_resource_reference(&surface->texture, NULL);
2058    FREE(surface);
2059 }
2060 
si_translate_colorswap(enum pipe_format format,bool do_endian_swap)2061 unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap)
2062 {
2063    const struct util_format_description *desc = util_format_description(format);
2064 
2065 #define HAS_SWIZZLE(chan, swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
2066 
2067    if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
2068       return V_028C70_SWAP_STD;
2069 
2070    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
2071       return ~0U;
2072 
2073    switch (desc->nr_channels) {
2074    case 1:
2075       if (HAS_SWIZZLE(0, X))
2076          return V_028C70_SWAP_STD; /* X___ */
2077       else if (HAS_SWIZZLE(3, X))
2078          return V_028C70_SWAP_ALT_REV; /* ___X */
2079       break;
2080    case 2:
2081       if ((HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, Y)) || (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, NONE)) ||
2082           (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, Y)))
2083          return V_028C70_SWAP_STD; /* XY__ */
2084       else if ((HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, X)) ||
2085                (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, NONE)) ||
2086                (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, X)))
2087          /* YX__ */
2088          return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
2089       else if (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(3, Y))
2090          return V_028C70_SWAP_ALT; /* X__Y */
2091       else if (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(3, X))
2092          return V_028C70_SWAP_ALT_REV; /* Y__X */
2093       break;
2094    case 3:
2095       if (HAS_SWIZZLE(0, X))
2096          return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
2097       else if (HAS_SWIZZLE(0, Z))
2098          return V_028C70_SWAP_STD_REV; /* ZYX */
2099       break;
2100    case 4:
2101       /* check the middle channels, the 1st and 4th channel can be NONE */
2102       if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, Z)) {
2103          return V_028C70_SWAP_STD; /* XYZW */
2104       } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, Y)) {
2105          return V_028C70_SWAP_STD_REV; /* WZYX */
2106       } else if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, X)) {
2107          return V_028C70_SWAP_ALT; /* ZYXW */
2108       } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, W)) {
2109          /* YZWX */
2110          if (desc->is_array)
2111             return V_028C70_SWAP_ALT_REV;
2112          else
2113             return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
2114       }
2115       break;
2116    }
2117    return ~0U;
2118 }
2119 
2120 static struct pipe_memory_object *
si_memobj_from_handle(struct pipe_screen * screen,struct winsys_handle * whandle,bool dedicated)2121 si_memobj_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle, bool dedicated)
2122 {
2123    struct si_screen *sscreen = (struct si_screen *)screen;
2124    struct si_memory_object *memobj = CALLOC_STRUCT(si_memory_object);
2125    struct pb_buffer *buf = NULL;
2126 
2127    if (!memobj)
2128       return NULL;
2129 
2130    buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, sscreen->info.max_alignment);
2131    if (!buf) {
2132       free(memobj);
2133       return NULL;
2134    }
2135 
2136    memobj->b.dedicated = dedicated;
2137    memobj->buf = buf;
2138    memobj->stride = whandle->stride;
2139 
2140    return (struct pipe_memory_object *)memobj;
2141 }
2142 
si_memobj_destroy(struct pipe_screen * screen,struct pipe_memory_object * _memobj)2143 static void si_memobj_destroy(struct pipe_screen *screen, struct pipe_memory_object *_memobj)
2144 {
2145    struct si_memory_object *memobj = (struct si_memory_object *)_memobj;
2146 
2147    radeon_bo_reference(((struct si_screen*)screen)->ws, &memobj->buf, NULL);
2148    free(memobj);
2149 }
2150 
si_resource_from_memobj(struct pipe_screen * screen,const struct pipe_resource * templ,struct pipe_memory_object * _memobj,uint64_t offset)2151 static struct pipe_resource *si_resource_from_memobj(struct pipe_screen *screen,
2152                                                     const struct pipe_resource *templ,
2153                                                     struct pipe_memory_object *_memobj,
2154                                                     uint64_t offset)
2155 {
2156    struct si_screen *sscreen = (struct si_screen *)screen;
2157    struct si_memory_object *memobj = (struct si_memory_object *)_memobj;
2158    struct pipe_resource *res;
2159 
2160    if (templ->target == PIPE_BUFFER)
2161       res = si_buffer_from_winsys_buffer(screen, templ, memobj->buf,
2162                                          memobj->b.dedicated);
2163    else
2164       res = si_texture_from_winsys_buffer(sscreen, templ, memobj->buf,
2165                                           memobj->stride,
2166                                           offset, DRM_FORMAT_MOD_INVALID,
2167                                           PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE | PIPE_HANDLE_USAGE_SHADER_WRITE,
2168                                           memobj->b.dedicated);
2169 
2170    if (!res)
2171       return NULL;
2172 
2173    /* si_texture_from_winsys_buffer doesn't increment refcount of
2174     * memobj->buf, so increment it here.
2175     */
2176    struct pb_buffer *buf = NULL;
2177    radeon_bo_reference(sscreen->ws, &buf, memobj->buf);
2178    return res;
2179 }
2180 
si_check_resource_capability(struct pipe_screen * screen,struct pipe_resource * resource,unsigned bind)2181 static bool si_check_resource_capability(struct pipe_screen *screen, struct pipe_resource *resource,
2182                                          unsigned bind)
2183 {
2184    struct si_texture *tex = (struct si_texture *)resource;
2185 
2186    /* Buffers only support the linear flag. */
2187    if (resource->target == PIPE_BUFFER)
2188       return (bind & ~PIPE_BIND_LINEAR) == 0;
2189 
2190    if (bind & PIPE_BIND_LINEAR && !tex->surface.is_linear)
2191       return false;
2192 
2193    if (bind & PIPE_BIND_SCANOUT && !tex->surface.is_displayable)
2194       return false;
2195 
2196    /* TODO: PIPE_BIND_CURSOR - do we care? */
2197    return true;
2198 }
2199 
si_init_screen_texture_functions(struct si_screen * sscreen)2200 void si_init_screen_texture_functions(struct si_screen *sscreen)
2201 {
2202    sscreen->b.resource_from_handle = si_texture_from_handle;
2203    sscreen->b.resource_get_handle = si_texture_get_handle;
2204    sscreen->b.resource_get_param = si_resource_get_param;
2205    sscreen->b.resource_get_info = si_texture_get_info;
2206    sscreen->b.resource_from_memobj = si_resource_from_memobj;
2207    sscreen->b.memobj_create_from_handle = si_memobj_from_handle;
2208    sscreen->b.memobj_destroy = si_memobj_destroy;
2209    sscreen->b.check_resource_capability = si_check_resource_capability;
2210 
2211    /* By not setting it the frontend will fall back to non-modifier create,
2212     * which works around some applications using modifiers that are not
2213     * allowed in combination with lack of error reporting in
2214     * gbm_dri_surface_create */
2215    if (sscreen->info.chip_class >= GFX9 && sscreen->info.kernel_has_modifiers) {
2216       sscreen->b.resource_create_with_modifiers = si_texture_create_with_modifiers;
2217       sscreen->b.query_dmabuf_modifiers = si_query_dmabuf_modifiers;
2218       sscreen->b.is_dmabuf_modifier_supported = si_is_dmabuf_modifier_supported;
2219       sscreen->b.get_dmabuf_modifier_planes = si_get_dmabuf_modifier_planes;
2220    }
2221 }
2222 
si_init_context_texture_functions(struct si_context * sctx)2223 void si_init_context_texture_functions(struct si_context *sctx)
2224 {
2225    sctx->b.texture_map = si_texture_transfer_map;
2226    sctx->b.texture_unmap = si_texture_transfer_unmap;
2227    sctx->b.create_surface = si_create_surface;
2228    sctx->b.surface_destroy = si_surface_destroy;
2229 }
2230