• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  * Copyright 2015-2021 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 #include "si_build_pm4.h"
27 #include "sid.h"
28 #include "util/u_memory.h"
29 
30 
31 static
si_prepare_for_sdma_copy(struct si_context * sctx,struct si_texture * dst,struct si_texture * src)32 bool si_prepare_for_sdma_copy(struct si_context *sctx, struct si_texture *dst,struct si_texture *src)
33 {
34    if (dst->surface.bpe != src->surface.bpe)
35       return false;
36 
37    /* MSAA: Blits don't exist in the real world. */
38    if (src->buffer.b.b.nr_samples > 1 || dst->buffer.b.b.nr_samples > 1)
39       return false;
40 
41    if (dst->buffer.b.b.last_level != 0 || src->buffer.b.b.last_level != 0)
42       return false;
43 
44    return true;
45 }
46 
minify_as_blocks(unsigned width,unsigned level,unsigned blk_w)47 static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
48 {
49    width = u_minify(width, level);
50    return DIV_ROUND_UP(width, blk_w);
51 }
52 
encode_legacy_tile_info(struct si_context * sctx,struct si_texture * tex)53 static unsigned encode_legacy_tile_info(struct si_context *sctx, struct si_texture *tex)
54 {
55    struct radeon_info *info = &sctx->screen->info;
56    unsigned tile_index = tex->surface.u.legacy.tiling_index[0];
57    unsigned macro_tile_index = tex->surface.u.legacy.macro_tile_index;
58    unsigned tile_mode = info->si_tile_mode_array[tile_index];
59    unsigned macro_tile_mode = info->cik_macrotile_mode_array[macro_tile_index];
60 
61    return util_logbase2(tex->surface.bpe) |
62           (G_009910_ARRAY_MODE(tile_mode) << 3) |
63           (G_009910_MICRO_TILE_MODE_NEW(tile_mode) << 8) |
64           /* Non-depth modes don't have TILE_SPLIT set. */
65           ((util_logbase2(tex->surface.u.legacy.tile_split >> 6)) << 11) |
66           (G_009990_BANK_WIDTH(macro_tile_mode) << 15) |
67           (G_009990_BANK_HEIGHT(macro_tile_mode) << 18) |
68           (G_009990_NUM_BANKS(macro_tile_mode) << 21) |
69           (G_009990_MACRO_TILE_ASPECT(macro_tile_mode) << 24) |
70           (G_009910_PIPE_CONFIG(tile_mode) << 26);
71 }
72 
73 static
si_translate_format_to_hw(struct si_context * sctx,enum pipe_format format,unsigned * hw_fmt,unsigned * hw_type)74 bool si_translate_format_to_hw(struct si_context *sctx, enum pipe_format format, unsigned *hw_fmt, unsigned *hw_type)
75 {
76    const struct util_format_description *desc = util_format_description(format);
77    *hw_fmt = si_translate_colorformat(sctx->chip_class, format);
78 
79    int firstchan;
80    for (firstchan = 0; firstchan < 4; firstchan++) {
81       if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {
82          break;
83       }
84    }
85    if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
86       *hw_type = V_028C70_NUMBER_FLOAT;
87    } else {
88       *hw_type = V_028C70_NUMBER_UNORM;
89       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
90          *hw_type = V_028C70_NUMBER_SRGB;
91       else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
92          if (desc->channel[firstchan].pure_integer) {
93             *hw_type = V_028C70_NUMBER_SINT;
94          } else {
95             assert(desc->channel[firstchan].normalized);
96             *hw_type = V_028C70_NUMBER_SNORM;
97          }
98       } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
99          if (desc->channel[firstchan].pure_integer) {
100             *hw_type = V_028C70_NUMBER_UINT;
101          } else {
102             assert(desc->channel[firstchan].normalized);
103             *hw_type = V_028C70_NUMBER_UNORM;
104          }
105       } else {
106          return false;
107       }
108    }
109    return true;
110 }
111 
112 static
si_sdma_v4_v5_copy_texture(struct si_context * sctx,struct si_texture * sdst,struct si_texture * ssrc,bool is_v5)113 bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_texture *sdst, struct si_texture *ssrc, bool is_v5)
114 {
115    unsigned bpp = sdst->surface.bpe;
116    uint64_t dst_address = sdst->buffer.gpu_address + sdst->surface.u.gfx9.surf_offset;
117    uint64_t src_address = ssrc->buffer.gpu_address + ssrc->surface.u.gfx9.surf_offset;
118    unsigned dst_pitch = sdst->surface.u.gfx9.surf_pitch;
119    unsigned src_pitch = ssrc->surface.u.gfx9.surf_pitch;
120    unsigned copy_width = DIV_ROUND_UP(ssrc->buffer.b.b.width0, ssrc->surface.blk_w);
121    unsigned copy_height = DIV_ROUND_UP(ssrc->buffer.b.b.height0, ssrc->surface.blk_h);
122 
123    bool tmz = (ssrc->buffer.flags & RADEON_FLAG_ENCRYPTED);
124    assert (!tmz || (sdst->buffer.flags & RADEON_FLAG_ENCRYPTED));
125 
126    /* Linear -> linear sub-window copy. */
127    if (ssrc->surface.is_linear && sdst->surface.is_linear) {
128       struct radeon_cmdbuf *cs = sctx->sdma_cs;
129 
130       unsigned bytes = src_pitch * copy_height * bpp;
131 
132       if (!(bytes < (1u << 22)))
133          return false;
134 
135       src_address += ssrc->surface.u.gfx9.offset[0];
136       dst_address += sdst->surface.u.gfx9.offset[0];
137 
138       radeon_begin(cs);
139       radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
140                                   CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
141                                   (tmz ? 4 : 0)));
142       radeon_emit(bytes);
143       radeon_emit(0);
144       radeon_emit(src_address);
145       radeon_emit(src_address >> 32);
146       radeon_emit(dst_address);
147       radeon_emit(dst_address >> 32);
148       radeon_end();
149       return true;
150    }
151 
152    /* Linear <-> Tiled sub-window copy */
153    if (ssrc->surface.is_linear != sdst->surface.is_linear) {
154       struct si_texture *tiled = ssrc->surface.is_linear ? sdst : ssrc;
155       struct si_texture *linear = tiled == ssrc ? sdst : ssrc;
156       unsigned tiled_width = DIV_ROUND_UP(tiled->buffer.b.b.width0, tiled->surface.blk_w);
157       unsigned tiled_height = DIV_ROUND_UP(tiled->buffer.b.b.height0, tiled->surface.blk_h);
158       unsigned linear_pitch = linear == ssrc ? src_pitch : dst_pitch;
159       unsigned linear_slice_pitch = ((uint64_t)linear->surface.u.gfx9.surf_slice_size) / bpp;
160       uint64_t tiled_address = tiled == ssrc ? src_address : dst_address;
161       uint64_t linear_address = linear == ssrc ? src_address : dst_address;
162       struct radeon_cmdbuf *cs = sctx->sdma_cs;
163       /* Only SDMA 5 supports DCC with SDMA */
164       bool dcc = vi_dcc_enabled(tiled, 0) && is_v5;
165       assert(tiled->buffer.b.b.depth0 == 1);
166 
167       linear_address += linear->surface.u.gfx9.offset[0];
168 
169       /* Check if everything fits into the bitfields */
170       if (!(tiled_width < (1 << 14) && tiled_height < (1 << 14) &&
171             linear_pitch < (1 << 14) && linear_slice_pitch < (1 << 28) &&
172             copy_width < (1 << 14) && copy_height < (1 << 14)))
173          return false;
174 
175       radeon_begin(cs);
176       radeon_emit(
177          CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
178                          CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW,
179                          (tmz ? 4 : 0)) |
180          dcc << 19 |
181          (is_v5 ? 0 : tiled->buffer.b.b.last_level) << 20 |
182          (linear == sdst ? 1u : 0) << 31);
183       radeon_emit((uint32_t)tiled_address | (tiled->surface.tile_swizzle << 8));
184       radeon_emit((uint32_t)(tiled_address >> 32));
185       radeon_emit(0);
186       radeon_emit(((tiled_width - 1) << 16));
187       radeon_emit((tiled_height - 1));
188       radeon_emit(util_logbase2(bpp) |
189                   tiled->surface.u.gfx9.swizzle_mode << 3 |
190                   tiled->surface.u.gfx9.resource_type << 9 |
191                   (is_v5 ? tiled->buffer.b.b.last_level : tiled->surface.u.gfx9.epitch) << 16);
192       radeon_emit((uint32_t)linear_address);
193       radeon_emit((uint32_t)(linear_address >> 32));
194       radeon_emit(0);
195       radeon_emit(((linear_pitch - 1) << 16));
196       radeon_emit(linear_slice_pitch - 1);
197       radeon_emit((copy_width - 1) | ((copy_height - 1) << 16));
198       radeon_emit(0);
199 
200       if (dcc) {
201          unsigned hw_fmt, hw_type;
202          uint64_t md_address = tiled_address + tiled->surface.meta_offset;
203 
204          si_translate_format_to_hw(sctx, tiled->buffer.b.b.format, &hw_fmt, &hw_type);
205 
206          /* Add metadata */
207          radeon_emit((uint32_t)md_address);
208          radeon_emit((uint32_t)(md_address >> 32));
209          radeon_emit(hw_fmt |
210                      vi_alpha_is_on_msb(sctx->screen, tiled->buffer.b.b.format) << 8 |
211                      hw_type << 9 |
212                      tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
213                      V_028C78_MAX_BLOCK_SIZE_256B << 26 |
214                      tmz << 29 |
215                      tiled->surface.u.gfx9.color.dcc.pipe_aligned << 31);
216       }
217       radeon_end();
218       return true;
219    }
220 
221    return false;
222 }
223 
224 static
cik_sdma_copy_texture(struct si_context * sctx,struct si_texture * sdst,struct si_texture * ssrc)225 bool cik_sdma_copy_texture(struct si_context *sctx, struct si_texture *sdst, struct si_texture *ssrc)
226 {
227    struct radeon_info *info = &sctx->screen->info;
228    unsigned bpp = sdst->surface.bpe;
229    uint64_t dst_address = sdst->buffer.gpu_address + sdst->surface.u.legacy.level[0].offset_256B * 256;
230    uint64_t src_address = ssrc->buffer.gpu_address + ssrc->surface.u.legacy.level[0].offset_256B * 256;
231    unsigned dst_mode = sdst->surface.u.legacy.level[0].mode;
232    unsigned src_mode = ssrc->surface.u.legacy.level[0].mode;
233    unsigned dst_tile_index = sdst->surface.u.legacy.tiling_index[0];
234    unsigned src_tile_index = ssrc->surface.u.legacy.tiling_index[0];
235    unsigned dst_tile_mode = info->si_tile_mode_array[dst_tile_index];
236    unsigned src_tile_mode = info->si_tile_mode_array[src_tile_index];
237    unsigned dst_micro_mode = G_009910_MICRO_TILE_MODE_NEW(dst_tile_mode);
238    unsigned src_micro_mode = G_009910_MICRO_TILE_MODE_NEW(src_tile_mode);
239    unsigned dst_tile_swizzle = dst_mode == RADEON_SURF_MODE_2D ? sdst->surface.tile_swizzle : 0;
240    unsigned src_tile_swizzle = src_mode == RADEON_SURF_MODE_2D ? ssrc->surface.tile_swizzle : 0;
241    unsigned dst_pitch = sdst->surface.u.legacy.level[0].nblk_x;
242    unsigned src_pitch = ssrc->surface.u.legacy.level[0].nblk_x;
243    uint64_t dst_slice_pitch =
244       ((uint64_t)sdst->surface.u.legacy.level[0].slice_size_dw * 4) / bpp;
245    uint64_t src_slice_pitch =
246       ((uint64_t)ssrc->surface.u.legacy.level[0].slice_size_dw * 4) / bpp;
247    unsigned dst_width = minify_as_blocks(sdst->buffer.b.b.width0, 0, sdst->surface.blk_w);
248    unsigned src_width = minify_as_blocks(ssrc->buffer.b.b.width0, 0, ssrc->surface.blk_w);
249    unsigned copy_width = DIV_ROUND_UP(ssrc->buffer.b.b.width0, ssrc->surface.blk_w);
250    unsigned copy_height = DIV_ROUND_UP(ssrc->buffer.b.b.height0, ssrc->surface.blk_h);
251 
252    dst_address |= dst_tile_swizzle << 8;
253    src_address |= src_tile_swizzle << 8;
254 
255    /* Linear -> linear sub-window copy. */
256    if (dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED && src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED &&
257        /* check if everything fits into the bitfields */
258        src_pitch <= (1 << 14) && dst_pitch <= (1 << 14) && src_slice_pitch <= (1 << 28) &&
259        dst_slice_pitch <= (1 << 28) && copy_width <= (1 << 14) && copy_height <= (1 << 14) &&
260        /* HW limitation - GFX7: */
261        (sctx->chip_class != GFX7 ||
262         (copy_width < (1 << 14) && copy_height < (1 << 14))) &&
263        /* HW limitation - some GFX7 parts: */
264        ((sctx->family != CHIP_BONAIRE && sctx->family != CHIP_KAVERI) ||
265         (copy_width != (1 << 14) && copy_height != (1 << 14)))) {
266       struct radeon_cmdbuf *cs = sctx->sdma_cs;
267 
268       radeon_begin(cs);
269       radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
270                   (util_logbase2(bpp) << 29));
271       radeon_emit(src_address);
272       radeon_emit(src_address >> 32);
273       radeon_emit(0);
274       radeon_emit((src_pitch - 1) << 16);
275       radeon_emit(src_slice_pitch - 1);
276       radeon_emit(dst_address);
277       radeon_emit(dst_address >> 32);
278       radeon_emit(0);
279       radeon_emit((dst_pitch - 1) << 16);
280       radeon_emit(dst_slice_pitch - 1);
281       if (sctx->chip_class == GFX7) {
282          radeon_emit(copy_width | (copy_height << 16));
283          radeon_emit(0);
284       } else {
285          radeon_emit((copy_width - 1) | ((copy_height - 1) << 16));
286          radeon_emit(0);
287       }
288       radeon_end();
289       return true;
290    }
291 
292    /* Tiled <-> linear sub-window copy. */
293    if ((src_mode >= RADEON_SURF_MODE_1D) != (dst_mode >= RADEON_SURF_MODE_1D)) {
294       struct si_texture *tiled = src_mode >= RADEON_SURF_MODE_1D ? ssrc : sdst;
295       struct si_texture *linear = tiled == ssrc ? sdst : ssrc;
296       unsigned tiled_width = tiled == ssrc ? src_width : dst_width;
297       unsigned linear_width = linear == ssrc ? src_width : dst_width;
298       unsigned tiled_pitch = tiled == ssrc ? src_pitch : dst_pitch;
299       unsigned linear_pitch = linear == ssrc ? src_pitch : dst_pitch;
300       unsigned tiled_slice_pitch = tiled == ssrc ? src_slice_pitch : dst_slice_pitch;
301       unsigned linear_slice_pitch = linear == ssrc ? src_slice_pitch : dst_slice_pitch;
302       uint64_t tiled_address = tiled == ssrc ? src_address : dst_address;
303       uint64_t linear_address = linear == ssrc ? src_address : dst_address;
304       unsigned tiled_micro_mode = tiled == ssrc ? src_micro_mode : dst_micro_mode;
305 
306       assert(tiled_pitch % 8 == 0);
307       assert(tiled_slice_pitch % 64 == 0);
308       unsigned pitch_tile_max = tiled_pitch / 8 - 1;
309       unsigned slice_tile_max = tiled_slice_pitch / 64 - 1;
310       unsigned xalign = MAX2(1, 4 / bpp);
311       unsigned copy_width_aligned = copy_width;
312 
313       /* If the region ends at the last pixel and is unaligned, we
314        * can copy the remainder of the line that is not visible to
315        * make it aligned.
316        */
317       if (copy_width % xalign != 0 && 0 + copy_width == linear_width &&
318           copy_width == tiled_width &&
319           align(copy_width, xalign) <= linear_pitch &&
320           align(copy_width, xalign) <= tiled_pitch)
321          copy_width_aligned = align(copy_width, xalign);
322 
323       /* HW limitations. */
324       if ((sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KAVERI) &&
325           linear_pitch - 1 == 0x3fff && bpp == 16)
326          return false;
327 
328       if ((sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KAVERI ||
329            sctx->family == CHIP_KABINI) &&
330           (copy_width == (1 << 14) || copy_height == (1 << 14)))
331          return false;
332 
333       /* The hw can read outside of the given linear buffer bounds,
334        * or access those pages but not touch the memory in case
335        * of writes. (it still causes a VM fault)
336        *
337        * Out-of-bounds memory access or page directory access must
338        * be prevented.
339        */
340       int64_t start_linear_address, end_linear_address;
341       unsigned granularity;
342 
343       /* Deduce the size of reads from the linear surface. */
344       switch (tiled_micro_mode) {
345       case V_009910_ADDR_SURF_DISPLAY_MICRO_TILING:
346          granularity = bpp == 1 ? 64 / (8 * bpp) : 128 / (8 * bpp);
347          break;
348       case V_009910_ADDR_SURF_THIN_MICRO_TILING:
349       case V_009910_ADDR_SURF_DEPTH_MICRO_TILING:
350          if (0 /* TODO: THICK microtiling */)
351             granularity =
352                bpp == 1 ? 32 / (8 * bpp)
353                         : bpp == 2 ? 64 / (8 * bpp) : bpp <= 8 ? 128 / (8 * bpp) : 256 / (8 * bpp);
354          else
355             granularity = bpp <= 2 ? 64 / (8 * bpp) : bpp <= 8 ? 128 / (8 * bpp) : 256 / (8 * bpp);
356          break;
357       default:
358          return false;
359       }
360 
361       /* The linear reads start at tiled_x & ~(granularity - 1).
362        * If linear_x == 0 && tiled_x % granularity != 0, the hw
363        * starts reading from an address preceding linear_address!!!
364        */
365       start_linear_address =
366          linear->surface.u.legacy.level[0].offset_256B * 256;
367 
368       end_linear_address =
369          linear->surface.u.legacy.level[0].offset_256B * 256 +
370          bpp * ((copy_height - 1) * linear_pitch + copy_width);
371 
372       if ((0 + copy_width) % granularity)
373          end_linear_address += granularity - (0 + copy_width) % granularity;
374 
375       if (start_linear_address < 0 || end_linear_address > linear->surface.surf_size)
376          return false;
377 
378       /* Check requirements. */
379       if (tiled_address % 256 == 0 && linear_address % 4 == 0 && linear_pitch % xalign == 0 &&
380           copy_width_aligned % xalign == 0 &&
381           tiled_micro_mode != V_009910_ADDR_SURF_ROTATED_MICRO_TILING &&
382           /* check if everything fits into the bitfields */
383           tiled->surface.u.legacy.tile_split <= 4096 && pitch_tile_max < (1 << 11) &&
384           slice_tile_max < (1 << 22) && linear_pitch <= (1 << 14) &&
385           linear_slice_pitch <= (1 << 28) && copy_width_aligned <= (1 << 14) &&
386           copy_height <= (1 << 14)) {
387          struct radeon_cmdbuf *cs = sctx->sdma_cs;
388          uint32_t direction = linear == sdst ? 1u << 31 : 0;
389 
390          radeon_begin(cs);
391          radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
392                                      CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) |
393                      direction);
394          radeon_emit(tiled_address);
395          radeon_emit(tiled_address >> 32);
396          radeon_emit(0);
397          radeon_emit(pitch_tile_max << 16);
398          radeon_emit(slice_tile_max);
399          radeon_emit(encode_legacy_tile_info(sctx, tiled));
400          radeon_emit(linear_address);
401          radeon_emit(linear_address >> 32);
402          radeon_emit(0);
403          radeon_emit(((linear_pitch - 1) << 16));
404          radeon_emit(linear_slice_pitch - 1);
405          if (sctx->chip_class == GFX7) {
406             radeon_emit(copy_width_aligned | (copy_height << 16));
407             radeon_emit(1);
408          } else {
409             radeon_emit((copy_width_aligned - 1) | ((copy_height - 1) << 16));
410             radeon_emit(0);
411          }
412          radeon_end();
413          return true;
414       }
415    }
416 
417    return false;
418 }
419 
si_sdma_copy_image(struct si_context * sctx,struct si_texture * dst,struct si_texture * src)420 bool si_sdma_copy_image(struct si_context *sctx, struct si_texture *dst, struct si_texture *src)
421 {
422    struct radeon_winsys *ws = sctx->ws;
423 
424    if (!sctx->sdma_cs) {
425       if (sctx->screen->debug_flags & DBG(NO_DMA) || sctx->chip_class < GFX7)
426          return false;
427 
428       sctx->sdma_cs = CALLOC_STRUCT(radeon_cmdbuf);
429       if (ws->cs_create(sctx->sdma_cs, sctx->ctx, RING_DMA,
430                         NULL, NULL, true))
431          return false;
432    }
433 
434    if (!si_prepare_for_sdma_copy(sctx, dst, src))
435       return false;
436 
437    /* Decompress DCC on older chips */
438    if (vi_dcc_enabled(src, 0) && sctx->chip_class < GFX10)
439       si_decompress_dcc(sctx, src);
440    /* TODO: DCC compression is possible on GFX10+. See si_set_mutable_tex_desc_fields for
441     * additional constraints.
442     * For now, the only use-case of SDMA is DRI_PRIME tiled->linear copy, so this is not
443     * implemented. */
444    if (vi_dcc_enabled(dst, 0))
445       return false;
446 
447    /* Always flush the gfx queue to get the winsys to handle the dependencies for us. */
448    si_flush_gfx_cs(sctx, 0, NULL);
449 
450    switch (sctx->chip_class) {
451       case GFX7:
452       case GFX8:
453          if (!cik_sdma_copy_texture(sctx, dst, src))
454             return false;
455          break;
456       case GFX9:
457       case GFX10:
458       case GFX10_3:
459          if (!si_sdma_v4_v5_copy_texture(sctx, dst, src, sctx->chip_class >= GFX10))
460             return false;
461          break;
462       default:
463          return false;
464    }
465 
466    radeon_add_to_buffer_list(sctx, sctx->sdma_cs, &src->buffer, RADEON_USAGE_READ,
467                              RADEON_PRIO_SAMPLER_TEXTURE);
468    radeon_add_to_buffer_list(sctx, sctx->sdma_cs, &dst->buffer, RADEON_USAGE_WRITE,
469                              RADEON_PRIO_SAMPLER_TEXTURE);
470 
471    unsigned flags = RADEON_FLUSH_START_NEXT_GFX_IB_NOW;
472    if (unlikely(radeon_uses_secure_bos(sctx->ws))) {
473       if ((bool) (src->buffer.flags & RADEON_FLAG_ENCRYPTED) !=
474           sctx->ws->cs_is_secure(sctx->sdma_cs)) {
475          flags = RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION;
476       }
477    }
478 
479    return ws->cs_flush(sctx->sdma_cs, flags, NULL) == 0;
480 }
481