• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include "util/u_atomic.h"
29 #include "util/u_debug.h"
30 #include "vulkan/util/vk_format.h"
31 #include "ac_drm_fourcc.h"
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_radeon_winsys.h"
35 #include "sid.h"
36 #include "vk_format.h"
37 #include "vk_render_pass.h"
38 #include "vk_util.h"
39 
40 #include "gfx10_format_table.h"
41 
42 static unsigned
radv_choose_tiling(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)43 radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
44 {
45    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46       assert(pCreateInfo->samples <= 1);
47       return RADEON_SURF_MODE_LINEAR_ALIGNED;
48    }
49 
50    if (pCreateInfo->usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
51       return RADEON_SURF_MODE_LINEAR_ALIGNED;
52 
53    /* MSAA resources must be 2D tiled. */
54    if (pCreateInfo->samples > 1)
55       return RADEON_SURF_MODE_2D;
56 
57    if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
58        device->physical_device->rad_info.gfx_level <= GFX8) {
59       /* this causes hangs in some VK CTS tests on GFX9. */
60       /* Textures with a very small height are recommended to be linear. */
61       if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
62           /* Only very thin and long 2D textures should benefit from
63            * linear_aligned. */
64           (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
65          return RADEON_SURF_MODE_LINEAR_ALIGNED;
66    }
67 
68    return RADEON_SURF_MODE_2D;
69 }
70 
71 static bool
radv_use_tc_compat_htile_for_image(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)72 radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
73 {
74    /* TC-compat HTILE is only available for GFX8+. */
75    if (device->physical_device->rad_info.gfx_level < GFX8)
76       return false;
77 
78    /* TC-compat HTILE looks broken on Tonga (and Iceland is the same design) and the documented bug
79     * workarounds don't help.
80     */
81    if (device->physical_device->rad_info.family == CHIP_TONGA ||
82        device->physical_device->rad_info.family == CHIP_ICELAND)
83       return false;
84 
85    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
86       return false;
87 
88    /* Do not enable TC-compatible HTILE if the image isn't readable by a
89     * shader because no texture fetches will happen.
90     */
91    if (!(pCreateInfo->usage &
92          (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93       return false;
94 
95    if (device->physical_device->rad_info.gfx_level < GFX9) {
96       /* TC-compat HTILE for MSAA depth/stencil images is broken
97        * on GFX8 because the tiling doesn't match.
98        */
99       if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
100          return false;
101 
102       /* GFX9+ supports compression for both 32-bit and 16-bit depth
103        * surfaces, while GFX8 only supports 32-bit natively. Though,
104        * the driver allows TC-compat HTILE for 16-bit depth surfaces
105        * with no Z planes compression.
106        */
107       if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT && format != VK_FORMAT_D16_UNORM)
108          return false;
109 
110       /* TC-compat HTILE for layered images can have interleaved slices (see sliceInterleaved flag
111        * in addrlib).  radv_clear_htile does not work.
112        */
113       if (pCreateInfo->arrayLayers > 1)
114          return false;
115    }
116 
117    /* GFX9 has issues when the sample count is 4 and the format is D16 */
118    if (device->physical_device->rad_info.gfx_level == GFX9 && pCreateInfo->samples == 4 &&
119        format == VK_FORMAT_D16_UNORM)
120       return false;
121 
122    return true;
123 }
124 
125 static bool
radv_surface_has_scanout(struct radv_device * device,const struct radv_image_create_info * info)126 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
127 {
128    if (info->bo_metadata) {
129       if (device->physical_device->rad_info.gfx_level >= GFX9)
130          return info->bo_metadata->u.gfx9.scanout;
131       else
132          return info->bo_metadata->u.legacy.scanout;
133    }
134 
135    return info->scanout;
136 }
137 
138 static bool
radv_image_use_fast_clear_for_image_early(const struct radv_device * device,const struct radv_image * image)139 radv_image_use_fast_clear_for_image_early(const struct radv_device *device, const struct radv_image *image)
140 {
141    if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
142       return true;
143 
144    if (image->vk.samples <= 1 && image->vk.extent.width * image->vk.extent.height <= 512 * 512) {
145       /* Do not enable CMASK or DCC for small surfaces where the cost
146        * of the eliminate pass can be higher than the benefit of fast
147        * clear. RadeonSI does this, but the image threshold is
148        * different.
149        */
150       return false;
151    }
152 
153    return !!(image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
154 }
155 
156 static bool
radv_image_use_fast_clear_for_image(const struct radv_device * device,const struct radv_image * image)157 radv_image_use_fast_clear_for_image(const struct radv_device *device, const struct radv_image *image)
158 {
159    if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
160       return true;
161 
162    return radv_image_use_fast_clear_for_image_early(device, image) && (image->exclusive ||
163                                                                        /* Enable DCC for concurrent images if stores are
164                                                                         * supported because that means we can keep DCC
165                                                                         * compressed on all layouts/queues.
166                                                                         */
167                                                                        radv_image_use_dcc_image_stores(device, image));
168 }
169 
170 bool
radv_are_formats_dcc_compatible(const struct radv_physical_device * pdev,const void * pNext,VkFormat format,VkImageCreateFlags flags,bool * sign_reinterpret)171 radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format,
172                                 VkImageCreateFlags flags, bool *sign_reinterpret)
173 {
174    bool blendable;
175 
176    if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
177       return false;
178 
179    if (sign_reinterpret != NULL)
180       *sign_reinterpret = false;
181 
182    /* All formats are compatible on GFX11. */
183    if ((flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) && pdev->rad_info.gfx_level < GFX11) {
184       const struct VkImageFormatListCreateInfo *format_list =
185          (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
186 
187       /* We have to ignore the existence of the list if viewFormatCount = 0 */
188       if (format_list && format_list->viewFormatCount) {
189          /* compatibility is transitive, so we only need to check
190           * one format with everything else. */
191          for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
192             if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
193                continue;
194 
195             if (!radv_dcc_formats_compatible(pdev->rad_info.gfx_level, format, format_list->pViewFormats[i],
196                                              sign_reinterpret))
197                return false;
198          }
199       } else {
200          return false;
201       }
202    }
203 
204    return true;
205 }
206 
207 static bool
radv_format_is_atomic_allowed(struct radv_device * device,VkFormat format)208 radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
209 {
210    if (format == VK_FORMAT_R32_SFLOAT && !radv_uses_image_float32_atomics(device))
211       return false;
212 
213    return radv_is_atomic_format_supported(format);
214 }
215 
216 static bool
radv_formats_is_atomic_allowed(struct radv_device * device,const void * pNext,VkFormat format,VkImageCreateFlags flags)217 radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format, VkImageCreateFlags flags)
218 {
219    if (radv_format_is_atomic_allowed(device, format))
220       return true;
221 
222    if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
223       const struct VkImageFormatListCreateInfo *format_list =
224          (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
225 
226       /* We have to ignore the existence of the list if viewFormatCount = 0 */
227       if (format_list && format_list->viewFormatCount) {
228          for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
229             if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
230                return true;
231          }
232       }
233    }
234 
235    return false;
236 }
237 
238 static bool
radv_use_dcc_for_image_early(struct radv_device * device,struct radv_image * image,const VkImageCreateInfo * pCreateInfo,VkFormat format,bool * sign_reinterpret)239 radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo,
240                              VkFormat format, bool *sign_reinterpret)
241 {
242    /* DCC (Delta Color Compression) is only available for GFX8+. */
243    if (device->physical_device->rad_info.gfx_level < GFX8)
244       return false;
245 
246    const VkImageCompressionControlEXT *compression =
247       vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
248 
249    if (device->instance->debug_flags & RADV_DEBUG_NO_DCC ||
250        (compression && compression->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT)) {
251       return false;
252    }
253 
254    if (image->shareable && image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
255       return false;
256 
257    /*
258     * TODO: Enable DCC for storage images on GFX9 and earlier.
259     *
260     * Also disable DCC with atomics because even when DCC stores are
261     * supported atomics will always decompress. So if we are
262     * decompressing a lot anyway we might as well not have DCC.
263     */
264    if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
265        (device->physical_device->rad_info.gfx_level < GFX10 ||
266         radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
267       return false;
268 
269    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
270       return false;
271 
272    if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
273       return false;
274 
275    if (!radv_image_use_fast_clear_for_image_early(device, image) &&
276        image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
277       return false;
278 
279    /* Do not enable DCC for mipmapped arrays because performance is worse. */
280    if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
281       return false;
282 
283    if (device->physical_device->rad_info.gfx_level < GFX10) {
284       /* TODO: Add support for DCC MSAA on GFX8-9. */
285       if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
286          return false;
287 
288       /* TODO: Add support for DCC layers/mipmaps on GFX9. */
289       if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
290           device->physical_device->rad_info.gfx_level == GFX9)
291          return false;
292    }
293 
294    /* DCC MSAA can't work on GFX10.3 and earlier without FMASK. */
295    if (pCreateInfo->samples > 1 && device->physical_device->rad_info.gfx_level < GFX11 &&
296        (device->instance->debug_flags & RADV_DEBUG_NO_FMASK))
297       return false;
298 
299    return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, pCreateInfo->flags,
300                                           sign_reinterpret);
301 }
302 
303 static bool
radv_use_dcc_for_image_late(struct radv_device * device,struct radv_image * image)304 radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
305 {
306    if (!radv_image_has_dcc(image))
307       return false;
308 
309    if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
310       return true;
311 
312    if (!radv_image_use_fast_clear_for_image(device, image))
313       return false;
314 
315    /* TODO: Fix storage images with DCC without DCC image stores.
316     * Disabling it for now. */
317    if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image))
318       return false;
319 
320    return true;
321 }
322 
323 /*
324  * Whether to enable image stores with DCC compression for this image. If
325  * this function returns false the image subresource should be decompressed
326  * before using it with image stores.
327  *
328  * Note that this can have mixed performance implications, see
329  * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
330  *
331  * This function assumes the image uses DCC compression.
332  */
333 bool
radv_image_use_dcc_image_stores(const struct radv_device * device,const struct radv_image * image)334 radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
335 {
336    return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.gfx_level, &image->planes[0].surface);
337 }
338 
339 /*
340  * Whether to use a predicate to determine whether DCC is in a compressed
341  * state. This can be used to avoid decompressing an image multiple times.
342  */
343 bool
radv_image_use_dcc_predication(const struct radv_device * device,const struct radv_image * image)344 radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
345 {
346    return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
347 }
348 
349 static inline bool
radv_use_fmask_for_image(const struct radv_device * device,const struct radv_image * image)350 radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
351 {
352    if (device->physical_device->rad_info.gfx_level == GFX9 && image->vk.array_layers > 1) {
353       /* On GFX9, FMASK can be interleaved with layers and this isn't properly supported. */
354       return false;
355    }
356 
357    return device->physical_device->use_fmask && image->vk.samples > 1 &&
358           ((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
359            (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
360 }
361 
362 static inline bool
radv_use_htile_for_image(const struct radv_device * device,const struct radv_image * image,const VkImageCreateInfo * pCreateInfo)363 radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image,
364                          const VkImageCreateInfo *pCreateInfo)
365 {
366    const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
367 
368    const VkImageCompressionControlEXT *compression =
369       vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
370 
371    if (device->instance->debug_flags & RADV_DEBUG_NO_HIZ ||
372        (compression && compression->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT))
373       return false;
374 
375    /* TODO:
376     * - Investigate about mips+layers.
377     * - Enable on other gens.
378     */
379    bool use_htile_for_mips = image->vk.array_layers == 1 && device->physical_device->rad_info.gfx_level >= GFX10;
380 
381    /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
382    if (device->physical_device->rad_info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT &&
383        image->vk.mip_levels > 1)
384       return false;
385 
386    /* Do not enable HTILE for very small images because it seems less performant but make sure it's
387     * allowed with VRS attachments because we need HTILE on GFX10.3.
388     */
389    if (image->vk.extent.width * image->vk.extent.height < 8 * 8 &&
390        !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
391        !(gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate))
392       return false;
393 
394    return (image->vk.mip_levels == 1 || use_htile_for_mips) && !image->shareable;
395 }
396 
397 static bool
radv_use_tc_compat_cmask_for_image(struct radv_device * device,struct radv_image * image)398 radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
399 {
400    /* TC-compat CMASK is only available for GFX8+. */
401    if (device->physical_device->rad_info.gfx_level < GFX8)
402       return false;
403 
404    /* GFX9 has issues when sample count is greater than 2 */
405    if (device->physical_device->rad_info.gfx_level == GFX9 && image->vk.samples > 2)
406       return false;
407 
408    if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
409       return false;
410 
411    /* TC-compat CMASK with storage images is supported on GFX10+. */
412    if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && device->physical_device->rad_info.gfx_level < GFX10)
413       return false;
414 
415    /* Do not enable TC-compatible if the image isn't readable by a shader
416     * because no texture fetches will happen.
417     */
418    if (!(image->vk.usage &
419          (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
420       return false;
421 
422    /* If the image doesn't have FMASK, it can't be fetchable. */
423    if (!radv_image_has_fmask(image))
424       return false;
425 
426    return true;
427 }
428 
429 static uint32_t
radv_get_bo_metadata_word1(const struct radv_device * device)430 radv_get_bo_metadata_word1(const struct radv_device *device)
431 {
432    return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
433 }
434 
435 static bool
radv_is_valid_opaque_metadata(const struct radv_device * device,const struct radeon_bo_metadata * md)436 radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
437 {
438    if (md->metadata[0] != 1 || md->metadata[1] != radv_get_bo_metadata_word1(device))
439       return false;
440 
441    if (md->size_metadata < 40)
442       return false;
443 
444    return true;
445 }
446 
447 static void
radv_patch_surface_from_metadata(struct radv_device * device,struct radeon_surf * surface,const struct radeon_bo_metadata * md)448 radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
449                                  const struct radeon_bo_metadata *md)
450 {
451    surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
452 
453    if (device->physical_device->rad_info.gfx_level >= GFX9) {
454       if (md->u.gfx9.swizzle_mode > 0)
455          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
456       else
457          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
458 
459       surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
460    } else {
461       surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
462       surface->u.legacy.bankw = md->u.legacy.bankw;
463       surface->u.legacy.bankh = md->u.legacy.bankh;
464       surface->u.legacy.tile_split = md->u.legacy.tile_split;
465       surface->u.legacy.mtilea = md->u.legacy.mtilea;
466       surface->u.legacy.num_banks = md->u.legacy.num_banks;
467 
468       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
469          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
470       else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
471          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
472       else
473          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
474    }
475 }
476 
477 static VkResult
radv_patch_image_dimensions(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)478 radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
479                             const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
480 {
481    unsigned width = image->vk.extent.width;
482    unsigned height = image->vk.extent.height;
483 
484    /*
485     * minigbm sometimes allocates bigger images which is going to result in
486     * weird strides and other properties. Lets be lenient where possible and
487     * fail it on GFX10 (as we cannot cope there).
488     *
489     * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
490     */
491    if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
492       const struct radeon_bo_metadata *md = create_info->bo_metadata;
493 
494       if (device->physical_device->rad_info.gfx_level >= GFX10) {
495          width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
496          height = G_00A008_HEIGHT(md->metadata[4]) + 1;
497       } else {
498          width = G_008F18_WIDTH(md->metadata[4]) + 1;
499          height = G_008F18_HEIGHT(md->metadata[4]) + 1;
500       }
501    }
502 
503    if (image->vk.extent.width == width && image->vk.extent.height == height)
504       return VK_SUCCESS;
505 
506    if (width < image->vk.extent.width || height < image->vk.extent.height) {
507       fprintf(stderr,
508               "The imported image has smaller dimensions than the internal\n"
509               "dimensions. Using it is going to fail badly, so we reject\n"
510               "this import.\n"
511               "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
512               image->vk.extent.width, image->vk.extent.height, width, height);
513       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
514    } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
515       fprintf(stderr,
516               "Tried to import an image with inconsistent width on GFX10.\n"
517               "As GFX10 has no separate stride fields we cannot cope with\n"
518               "an inconsistency in width and will fail this import.\n"
519               "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
520               image->vk.extent.width, image->vk.extent.height, width, height);
521       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
522    } else {
523       fprintf(stderr,
524               "Tried to import an image with inconsistent width on pre-GFX10.\n"
525               "As GFX10 has no separate stride fields we cannot cope with\n"
526               "an inconsistency and would fail on GFX10.\n"
527               "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
528               image->vk.extent.width, image->vk.extent.height, width, height);
529    }
530    image_info->width = width;
531    image_info->height = height;
532 
533    return VK_SUCCESS;
534 }
535 
536 static VkResult
radv_patch_image_from_extra_info(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)537 radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
538                                  const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
539 {
540    VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
541    if (result != VK_SUCCESS)
542       return result;
543 
544    for (unsigned plane = 0; plane < image->plane_count; ++plane) {
545       if (create_info->bo_metadata) {
546          radv_patch_surface_from_metadata(device, &image->planes[plane].surface, create_info->bo_metadata);
547       }
548 
549       if (radv_surface_has_scanout(device, create_info)) {
550          image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
551          if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
552             image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
553 
554          image_info->surf_index = NULL;
555       }
556 
557       if (create_info->prime_blit_src && !device->physical_device->rad_info.sdma_supports_compression) {
558          /* Older SDMA hw can't handle DCC */
559          image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
560       }
561    }
562    return VK_SUCCESS;
563 }
564 
565 static VkFormat
radv_image_get_plane_format(const struct radv_physical_device * pdev,const struct radv_image * image,unsigned plane)566 radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image, unsigned plane)
567 {
568    if (radv_is_format_emulated(pdev, image->vk.format)) {
569       if (plane == 0)
570          return image->vk.format;
571       if (vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
572          return vk_texcompress_astc_emulation_format(image->vk.format);
573       else
574          return vk_texcompress_etc2_emulation_format(image->vk.format);
575    }
576 
577    return vk_format_get_plane_format(image->vk.format, plane);
578 }
579 
580 static uint64_t
radv_get_surface_flags(struct radv_device * device,struct radv_image * image,unsigned plane_id,const VkImageCreateInfo * pCreateInfo,VkFormat image_format)581 radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
582                        const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
583 {
584    uint64_t flags;
585    unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
586    VkFormat format = radv_image_get_plane_format(device->physical_device, image, plane_id);
587    const struct util_format_description *desc = vk_format_description(format);
588    bool is_depth, is_stencil;
589 
590    is_depth = util_format_has_depth(desc);
591    is_stencil = util_format_has_stencil(desc);
592 
593    flags = RADEON_SURF_SET(array_mode, MODE);
594 
595    switch (pCreateInfo->imageType) {
596    case VK_IMAGE_TYPE_1D:
597       if (pCreateInfo->arrayLayers > 1)
598          flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
599       else
600          flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
601       break;
602    case VK_IMAGE_TYPE_2D:
603       if (pCreateInfo->arrayLayers > 1)
604          flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
605       else
606          flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
607       break;
608    case VK_IMAGE_TYPE_3D:
609       flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
610       break;
611    default:
612       unreachable("unhandled image type");
613    }
614 
615    /* Required for clearing/initializing a specific layer on GFX8. */
616    flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
617 
618    if (is_depth) {
619       flags |= RADEON_SURF_ZBUFFER;
620 
621       if (is_depth && is_stencil && device->physical_device->rad_info.gfx_level <= GFX8) {
622          if (!(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))
623             flags |= RADEON_SURF_NO_RENDER_TARGET;
624 
625          /* RADV doesn't support stencil pitch adjustment. As a result there are some spec gaps that
626           * are not covered by CTS.
627           *
628           * For D+S images with pitch constraints due to rendertarget usage it can happen that
629           * sampling from mipmaps beyond the base level of the descriptor is broken as the pitch
630           * adjustment can't be applied to anything beyond the first level.
631           */
632          flags |= RADEON_SURF_NO_STENCIL_ADJUST;
633       }
634 
635       if (radv_use_htile_for_image(device, image, pCreateInfo) && !(flags & RADEON_SURF_NO_RENDER_TARGET)) {
636          if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
637             flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
638       } else {
639          flags |= RADEON_SURF_NO_HTILE;
640       }
641    }
642 
643    if (is_stencil)
644       flags |= RADEON_SURF_SBUFFER;
645 
646    if (device->physical_device->rad_info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
647        vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
648       flags |= RADEON_SURF_NO_RENDER_TARGET;
649 
650    if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format, &image->dcc_sign_reinterpret))
651       flags |= RADEON_SURF_DISABLE_DCC;
652 
653    if (!radv_use_fmask_for_image(device, image))
654       flags |= RADEON_SURF_NO_FMASK;
655 
656    if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
657       flags |= RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
658    }
659 
660    if (image->queue_family_mask & BITFIELD_BIT(RADV_QUEUE_TRANSFER)) {
661       if (!device->physical_device->rad_info.sdma_supports_compression)
662          flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_HTILE;
663    }
664 
665    /* Disable DCC for VRS rate images because the hw can't handle compression. */
666    if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
667       flags |= RADEON_SURF_VRS_RATE | RADEON_SURF_DISABLE_DCC;
668    if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)))
669       flags |= RADEON_SURF_NO_TEXTURE;
670 
671    return flags;
672 }
673 
674 unsigned
radv_map_swizzle(unsigned swizzle)675 radv_map_swizzle(unsigned swizzle)
676 {
677    switch (swizzle) {
678    case PIPE_SWIZZLE_Y:
679       return V_008F0C_SQ_SEL_Y;
680    case PIPE_SWIZZLE_Z:
681       return V_008F0C_SQ_SEL_Z;
682    case PIPE_SWIZZLE_W:
683       return V_008F0C_SQ_SEL_W;
684    case PIPE_SWIZZLE_0:
685       return V_008F0C_SQ_SEL_0;
686    case PIPE_SWIZZLE_1:
687       return V_008F0C_SQ_SEL_1;
688    default: /* PIPE_SWIZZLE_X */
689       return V_008F0C_SQ_SEL_X;
690    }
691 }
692 
693 void
radv_compose_swizzle(const struct util_format_description * desc,const VkComponentMapping * mapping,enum pipe_swizzle swizzle[4])694 radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
695                      enum pipe_swizzle swizzle[4])
696 {
697    if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
698       /* 64-bit formats only support storage images and storage images
699        * require identity component mappings. We use 32-bit
700        * instructions to access 64-bit images, so we need a special
701        * case here.
702        *
703        * The zw components are 1,0 so that they can be easily be used
704        * by loads to create the w component, which has to be 0 for
705        * NULL descriptors.
706        */
707       swizzle[0] = PIPE_SWIZZLE_X;
708       swizzle[1] = PIPE_SWIZZLE_Y;
709       swizzle[2] = PIPE_SWIZZLE_1;
710       swizzle[3] = PIPE_SWIZZLE_0;
711    } else if (!mapping) {
712       for (unsigned i = 0; i < 4; i++)
713          swizzle[i] = desc->swizzle[i];
714    } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
715       const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1};
716       vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
717    } else {
718       vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
719    }
720 }
721 
722 bool
vi_alpha_is_on_msb(const struct radv_device * device,const VkFormat format)723 vi_alpha_is_on_msb(const struct radv_device *device, const VkFormat format)
724 {
725    if (device->physical_device->rad_info.gfx_level >= GFX11)
726       return false;
727 
728    const struct util_format_description *desc = vk_format_description(format);
729 
730    if (device->physical_device->rad_info.gfx_level >= GFX10 && desc->nr_channels == 1)
731       return desc->swizzle[3] == PIPE_SWIZZLE_X;
732 
733    return radv_translate_colorswap(format, false) <= 1;
734 }
735 
736 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,unsigned plane_id,struct radeon_bo_metadata * md)737 radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, unsigned plane_id,
738                            struct radeon_bo_metadata *md)
739 {
740    static const VkComponentMapping fixedmapping;
741    const VkFormat plane_format = radv_image_get_plane_format(device->physical_device, image, plane_id);
742    const unsigned plane_width = vk_format_get_plane_width(image->vk.format, plane_id, image->vk.extent.width);
743    const unsigned plane_height = vk_format_get_plane_height(image->vk.format, plane_id, image->vk.extent.height);
744    struct radeon_surf *surface = &image->planes[plane_id].surface;
745    const struct legacy_surf_level *base_level_info =
746       device->physical_device->rad_info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
747    uint32_t desc[8];
748 
749    radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, plane_format,
750                                 &fixedmapping, 0, image->vk.mip_levels - 1, 0, image->vk.array_layers - 1, plane_width,
751                                 plane_height, image->vk.extent.depth, 0.0f, desc, NULL, 0, NULL, NULL);
752 
753    radv_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, 0, 0, surface->blk_w, false, false, false,
754                                     false, desc, NULL);
755 
756    ac_surface_compute_umd_metadata(&device->physical_device->rad_info, surface, image->vk.mip_levels, desc,
757                                    &md->size_metadata, md->metadata,
758                                    device->instance->debug_flags & RADV_DEBUG_EXTRA_MD);
759 }
760 
761 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)762 radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata)
763 {
764    /* use plane 0, even when there are multiple planes, to follow radeonsi */
765    const unsigned plane_id = 0;
766    struct radeon_surf *surface = &image->planes[plane_id].surface;
767 
768    memset(metadata, 0, sizeof(*metadata));
769 
770    if (device->physical_device->rad_info.gfx_level >= GFX9) {
771       uint64_t dcc_offset =
772          image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
773       metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
774       metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
775       metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
776       metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
777       metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
778       metadata->u.gfx9.dcc_max_compressed_block_size = surface->u.gfx9.color.dcc.max_compressed_block_size;
779       metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
780    } else {
781       metadata->u.legacy.microtile =
782          surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
783       metadata->u.legacy.macrotile =
784          surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
785       metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
786       metadata->u.legacy.bankw = surface->u.legacy.bankw;
787       metadata->u.legacy.bankh = surface->u.legacy.bankh;
788       metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
789       metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
790       metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
791       metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
792       metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
793    }
794    radv_query_opaque_metadata(device, image, plane_id, metadata);
795 }
796 
797 void
radv_image_override_offset_stride(struct radv_device * device,struct radv_image * image,uint64_t offset,uint32_t stride)798 radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
799                                   uint32_t stride)
800 {
801    ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
802                                      image->vk.array_layers, image->vk.mip_levels, offset, stride);
803 }
804 
805 static void
radv_image_alloc_single_sample_cmask(const struct radv_device * device,const struct radv_image * image,struct radeon_surf * surf)806 radv_image_alloc_single_sample_cmask(const struct radv_device *device, const struct radv_image *image,
807                                      struct radeon_surf *surf)
808 {
809    if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->vk.mip_levels > 1 ||
810        image->vk.extent.depth > 1 || radv_image_has_dcc(image) || !radv_image_use_fast_clear_for_image(device, image) ||
811        (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
812       return;
813 
814    assert(image->vk.samples == 1);
815 
816    surf->cmask_offset = align64(surf->total_size, 1ull << surf->cmask_alignment_log2);
817    surf->total_size = surf->cmask_offset + surf->cmask_size;
818    surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
819 }
820 
821 static void
radv_image_alloc_values(const struct radv_device * device,struct radv_image * image)822 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
823 {
824    /* images with modifiers can be potentially imported */
825    if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
826       return;
827 
828    if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
829       image->fce_pred_offset = image->size;
830       image->size += 8 * image->vk.mip_levels;
831    }
832 
833    if (radv_image_use_dcc_predication(device, image)) {
834       image->dcc_pred_offset = image->size;
835       image->size += 8 * image->vk.mip_levels;
836    }
837 
838    if ((radv_image_has_dcc(image) && !image->support_comp_to_single) || radv_image_has_cmask(image) ||
839        radv_image_has_htile(image)) {
840       image->clear_value_offset = image->size;
841       image->size += 8 * image->vk.mip_levels;
842    }
843 
844    if (radv_image_is_tc_compat_htile(image) && device->physical_device->rad_info.has_tc_compat_zrange_bug) {
845       /* Metadata for the TC-compatible HTILE hardware bug which
846        * have to be fixed by updating ZRANGE_PRECISION when doing
847        * fast depth clears to 0.0f.
848        */
849       image->tc_compat_zrange_offset = image->size;
850       image->size += image->vk.mip_levels * 4;
851    }
852 }
853 
854 /* Determine if the image is affected by the pipe misaligned metadata issue
855  * which requires to invalidate L2.
856  */
857 static bool
radv_image_is_pipe_misaligned(const struct radv_device * device,const struct radv_image * image)858 radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
859 {
860    const struct radeon_info *rad_info = &device->physical_device->rad_info;
861    int log2_samples = util_logbase2(image->vk.samples);
862 
863    assert(rad_info->gfx_level >= GFX10);
864 
865    for (unsigned i = 0; i < image->plane_count; ++i) {
866       VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i);
867       int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
868       int log2_bpp_and_samples;
869 
870       if (rad_info->gfx_level >= GFX10_3) {
871          log2_bpp_and_samples = log2_bpp + log2_samples;
872       } else {
873          if (vk_format_has_depth(image->vk.format) && image->vk.array_layers >= 8) {
874             log2_bpp = 2;
875          }
876 
877          log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
878       }
879 
880       int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
881       int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
882 
883       if (vk_format_has_depth(image->vk.format)) {
884          if (radv_image_is_tc_compat_htile(image) && overlap) {
885             return true;
886          }
887       } else {
888          int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
889          int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
890          int samples_overlap = MIN2(log2_samples, overlap);
891 
892          /* TODO: It shouldn't be necessary if the image has DCC but
893           * not readable by shader.
894           */
895          if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
896              (samples_overlap > log2_samples_frag_diff)) {
897             return true;
898          }
899       }
900    }
901 
902    return false;
903 }
904 
905 static bool
radv_image_is_l2_coherent(const struct radv_device * device,const struct radv_image * image)906 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
907 {
908    if (device->physical_device->rad_info.gfx_level >= GFX10) {
909       return !device->physical_device->rad_info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
910    } else if (device->physical_device->rad_info.gfx_level == GFX9) {
911       if (image->vk.samples == 1 &&
912           (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
913           !vk_format_has_stencil(image->vk.format)) {
914          /* Single-sample color and single-sample depth
915           * (not stencil) are coherent with shaders on
916           * GFX9.
917           */
918          return true;
919       }
920    }
921 
922    return false;
923 }
924 
925 /**
926  * Determine if the given image can be fast cleared.
927  */
928 bool
radv_image_can_fast_clear(const struct radv_device * device,const struct radv_image * image)929 radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
930 {
931    if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
932       return false;
933 
934    if (vk_format_is_color(image->vk.format)) {
935       if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
936          return false;
937 
938       /* RB+ doesn't work with CMASK fast clear on Stoney. */
939       if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
940          return false;
941 
942       /* Fast-clears with CMASK aren't supported for 128-bit formats. */
943       if (radv_image_has_cmask(image) && vk_format_get_blocksizebits(image->vk.format) > 64)
944          return false;
945    } else {
946       if (!radv_image_has_htile(image))
947          return false;
948    }
949 
950    /* Do not fast clears 3D images. */
951    if (image->vk.image_type == VK_IMAGE_TYPE_3D)
952       return false;
953 
954    return true;
955 }
956 
957 /**
958  * Determine if the given image can be fast cleared using comp-to-single.
959  */
960 static bool
radv_image_use_comp_to_single(const struct radv_device * device,const struct radv_image * image)961 radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
962 {
963    /* comp-to-single is only available for GFX10+. */
964    if (device->physical_device->rad_info.gfx_level < GFX10)
965       return false;
966 
967    /* If the image can't be fast cleared, comp-to-single can't be used. */
968    if (!radv_image_can_fast_clear(device, image))
969       return false;
970 
971    /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
972    if (!radv_image_has_dcc(image))
973       return false;
974 
975    /* It seems 8bpp and 16bpp require RB+ to work. */
976    unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
977    if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed)
978       return false;
979 
980    return true;
981 }
982 
983 static unsigned
radv_get_internal_plane_count(const struct radv_physical_device * pdev,VkFormat fmt)984 radv_get_internal_plane_count(const struct radv_physical_device *pdev, VkFormat fmt)
985 {
986    if (radv_is_format_emulated(pdev, fmt))
987       return 2;
988    return vk_format_get_plane_count(fmt);
989 }
990 
991 static void
radv_image_reset_layout(const struct radv_physical_device * pdev,struct radv_image * image)992 radv_image_reset_layout(const struct radv_physical_device *pdev, struct radv_image *image)
993 {
994    image->size = 0;
995    image->alignment = 1;
996 
997    image->tc_compatible_cmask = 0;
998    image->fce_pred_offset = image->dcc_pred_offset = 0;
999    image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1000 
1001    unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
1002    for (unsigned i = 0; i < plane_count; ++i) {
1003       VkFormat format = radv_image_get_plane_format(pdev, image, i);
1004       if (vk_format_has_depth(format))
1005          format = vk_format_depth_only(format);
1006 
1007       uint64_t flags = image->planes[i].surface.flags;
1008       uint64_t modifier = image->planes[i].surface.modifier;
1009       memset(image->planes + i, 0, sizeof(image->planes[i]));
1010 
1011       image->planes[i].surface.flags = flags;
1012       image->planes[i].surface.modifier = modifier;
1013       image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1014       image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1015       image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1016 
1017       /* align byte per element on dword */
1018       if (image->planes[i].surface.bpe == 3) {
1019          image->planes[i].surface.bpe = 4;
1020       }
1021    }
1022 }
1023 
1024 struct ac_surf_info
radv_get_ac_surf_info(struct radv_device * device,const struct radv_image * image)1025 radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image)
1026 {
1027    struct ac_surf_info info;
1028 
1029    memset(&info, 0, sizeof(info));
1030 
1031    info.width = image->vk.extent.width;
1032    info.height = image->vk.extent.height;
1033    info.depth = image->vk.extent.depth;
1034    info.samples = image->vk.samples;
1035    info.storage_samples = image->vk.samples;
1036    info.array_size = image->vk.array_layers;
1037    info.levels = image->vk.mip_levels;
1038    info.num_channels = vk_format_get_nr_components(image->vk.format);
1039 
1040    if (!vk_format_is_depth_or_stencil(image->vk.format) && !image->shareable &&
1041        !(image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT | VK_IMAGE_CREATE_ALIAS_BIT)) &&
1042        image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1043       info.surf_index = &device->image_mrt_offset_counter;
1044    }
1045 
1046    return info;
1047 }
1048 
1049 VkResult
radv_image_create_layout(struct radv_device * device,struct radv_image_create_info create_info,const struct VkImageDrmFormatModifierExplicitCreateInfoEXT * mod_info,const struct VkVideoProfileListInfoKHR * profile_list,struct radv_image * image)1050 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1051                          const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1052                          const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image)
1053 {
1054    /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1055     * common internal case. */
1056    create_info.vk_info = NULL;
1057 
1058    struct ac_surf_info image_info = radv_get_ac_surf_info(device, image);
1059    VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1060    if (result != VK_SUCCESS)
1061       return result;
1062 
1063    assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1064 
1065    radv_image_reset_layout(device->physical_device, image);
1066 
1067    /*
1068     * Due to how the decoder works, the user can't supply an oversized image, because if it attempts
1069     * to sample it later with a linear filter, it will get garbage after the height it wants,
1070     * so we let the user specify the width/height unaligned, and align them preallocation.
1071     */
1072    if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) {
1073       assert(profile_list);
1074       uint32_t width_align, height_align;
1075       radv_video_get_profile_alignments(device->physical_device, profile_list, &width_align, &height_align);
1076       image_info.width = align(image_info.width, width_align);
1077       image_info.height = align(image_info.height, height_align);
1078 
1079       if (radv_has_uvd(device->physical_device) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) {
1080          /* UVD and kernel demand a full DPB allocation. */
1081          image_info.array_size = MIN2(16, image_info.array_size);
1082       }
1083    }
1084 
1085    unsigned plane_count = radv_get_internal_plane_count(device->physical_device, image->vk.format);
1086    for (unsigned plane = 0; plane < plane_count; ++plane) {
1087       struct ac_surf_info info = image_info;
1088       uint64_t offset;
1089       unsigned stride;
1090 
1091       info.width = vk_format_get_plane_width(image->vk.format, plane, info.width);
1092       info.height = vk_format_get_plane_height(image->vk.format, plane, info.height);
1093 
1094       if (create_info.no_metadata_planes || plane_count > 1) {
1095          image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1096       }
1097 
1098       device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1099 
1100       if (plane == 0) {
1101          if (!radv_use_dcc_for_image_late(device, image))
1102             ac_surface_zero_dcc_fields(&image->planes[0].surface);
1103       }
1104 
1105       if (create_info.bo_metadata && !mod_info &&
1106           !ac_surface_apply_umd_metadata(&device->physical_device->rad_info, &image->planes[plane].surface,
1107                                          image->vk.samples, image->vk.mip_levels,
1108                                          create_info.bo_metadata->size_metadata, create_info.bo_metadata->metadata))
1109          return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1110 
1111       if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info)
1112          radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1113 
1114       if (mod_info) {
1115          if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1116              !mod_info->pPlaneLayouts[plane].rowPitch)
1117             return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1118 
1119          offset = mod_info->pPlaneLayouts[plane].offset;
1120          stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1121       } else {
1122          offset = image->disjoint ? 0 : align64(image->size, 1ull << image->planes[plane].surface.alignment_log2);
1123          stride = 0; /* 0 means no override */
1124       }
1125 
1126       if (!ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[plane].surface,
1127                                              image->vk.array_layers, image->vk.mip_levels, offset, stride))
1128          return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1129 
1130       /* Validate DCC offsets in modifier layout. */
1131       if (plane_count == 1 && mod_info) {
1132          unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1133          if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1134             return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1135 
1136          for (unsigned i = 1; i < mem_planes; ++i) {
1137             if (ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &image->planes[plane].surface,
1138                                             i, 0) != mod_info->pPlaneLayouts[i].offset)
1139                return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1140          }
1141       }
1142 
1143       image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1144       image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1145 
1146       image->planes[plane].format = radv_image_get_plane_format(device->physical_device, image, plane);
1147    }
1148 
1149    image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1150 
1151    image->l2_coherent = radv_image_is_l2_coherent(device, image);
1152 
1153    image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1154 
1155    radv_image_alloc_values(device, image);
1156 
1157    assert(image->planes[0].surface.surf_size);
1158    assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1159           ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1160    return VK_SUCCESS;
1161 }
1162 
1163 static void
radv_destroy_image(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_image * image)1164 radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_image *image)
1165 {
1166    if ((image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bindings[0].bo) {
1167       radv_rmv_log_bo_destroy(device, image->bindings[0].bo);
1168       device->ws->buffer_destroy(device->ws, image->bindings[0].bo);
1169    }
1170 
1171    if (image->owned_memory != VK_NULL_HANDLE) {
1172       RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1173       radv_free_memory(device, pAllocator, mem);
1174    }
1175 
1176    radv_rmv_log_resource_destroy(device, (uint64_t)radv_image_to_handle(image));
1177    vk_image_finish(&image->vk);
1178    vk_free2(&device->vk.alloc, pAllocator, image);
1179 }
1180 
1181 static void
radv_image_print_info(struct radv_device * device,struct radv_image * image)1182 radv_image_print_info(struct radv_device *device, struct radv_image *image)
1183 {
1184    fprintf(stderr, "Image:\n");
1185    fprintf(stderr,
1186            "  Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1187            "width=%" PRIu32 ", height=%" PRIu32 ", depth=%" PRIu32 ", "
1188            "array_size=%" PRIu32 ", levels=%" PRIu32 "\n",
1189            image->size, image->alignment, image->vk.extent.width, image->vk.extent.height, image->vk.extent.depth,
1190            image->vk.array_layers, image->vk.mip_levels);
1191    for (unsigned i = 0; i < image->plane_count; ++i) {
1192       const struct radv_image_plane *plane = &image->planes[i];
1193       const struct radeon_surf *surf = &plane->surface;
1194       const struct util_format_description *desc = vk_format_description(plane->format);
1195       uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &plane->surface, 0, 0);
1196 
1197       fprintf(stderr, "  Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1198 
1199       ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
1200    }
1201 }
1202 
1203 static uint64_t
radv_select_modifier(const struct radv_device * dev,VkFormat format,const struct VkImageDrmFormatModifierListCreateInfoEXT * mod_list)1204 radv_select_modifier(const struct radv_device *dev, VkFormat format,
1205                      const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1206 {
1207    const struct radv_physical_device *pdev = dev->physical_device;
1208    unsigned mod_count;
1209 
1210    assert(mod_list->drmFormatModifierCount);
1211 
1212    /* We can allow everything here as it does not affect order and the application
1213     * is only allowed to specify modifiers that we support. */
1214    const struct ac_modifier_options modifier_options = {
1215       .dcc = true,
1216       .dcc_retile = true,
1217    };
1218 
1219    ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, NULL);
1220 
1221    uint64_t *mods = calloc(mod_count, sizeof(*mods));
1222 
1223    /* If allocations fail, fall back to a dumber solution. */
1224    if (!mods)
1225       return mod_list->pDrmFormatModifiers[0];
1226 
1227    ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, mods);
1228 
1229    for (unsigned i = 0; i < mod_count; ++i) {
1230       for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1231          if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1232             free(mods);
1233             return mod_list->pDrmFormatModifiers[j];
1234          }
1235       }
1236    }
1237    unreachable("App specified an invalid modifier");
1238 }
1239 
1240 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage,bool is_internal)1241 radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1242                   const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal)
1243 {
1244    RADV_FROM_HANDLE(radv_device, device, _device);
1245    const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1246    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1247    struct radv_image *image = NULL;
1248    VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1249    const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1250       vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1251    const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1252       vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1253    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1254    const struct VkVideoProfileListInfoKHR *profile_list =
1255       vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
1256 
1257    unsigned plane_count = radv_get_internal_plane_count(device->physical_device, format);
1258 
1259    const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1260 
1261    image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1262    if (!image)
1263       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1264 
1265    vk_image_init(&device->vk, &image->vk, pCreateInfo);
1266 
1267    image->plane_count = vk_format_get_plane_count(format);
1268    image->disjoint = image->plane_count > 1 && pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT;
1269 
1270    image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1271    if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1272       for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1273          if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1274              pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1275             image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1276          else
1277             image->queue_family_mask |=
1278                1u << vk_queue_to_radv(device->physical_device, pCreateInfo->pQueueFamilyIndices[i]);
1279 
1280       /* This queue never really accesses the image. */
1281       image->queue_family_mask &= ~(1u << RADV_QUEUE_SPARSE);
1282    }
1283 
1284    const VkExternalMemoryImageCreateInfo *external_info =
1285       vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1286 
1287    image->shareable = external_info;
1288 
1289    if (mod_list)
1290       modifier = radv_select_modifier(device, format, mod_list);
1291    else if (explicit_mod)
1292       modifier = explicit_mod->drmFormatModifier;
1293 
1294    for (unsigned plane = 0; plane < plane_count; ++plane) {
1295       image->planes[plane].surface.flags = radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1296       image->planes[plane].surface.modifier = modifier;
1297    }
1298 
1299    if (image->vk.external_handle_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) {
1300 #if DETECT_OS_ANDROID
1301       image->vk.ahb_format = radv_ahb_format_for_vk_format(image->vk.format);
1302 #endif
1303 
1304       *pImage = radv_image_to_handle(image);
1305       assert(!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1306       return VK_SUCCESS;
1307    }
1308 
1309    VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, profile_list, image);
1310    if (result != VK_SUCCESS) {
1311       radv_destroy_image(device, alloc, image);
1312       return result;
1313    }
1314 
1315    if (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1316       image->alignment = MAX2(image->alignment, 4096);
1317       image->size = align64(image->size, image->alignment);
1318       image->bindings[0].offset = 0;
1319 
1320       result = device->ws->buffer_create(device->ws, image->size, image->alignment, 0, RADEON_FLAG_VIRTUAL,
1321                                          RADV_BO_PRIORITY_VIRTUAL, 0, &image->bindings[0].bo);
1322       if (result != VK_SUCCESS) {
1323          radv_destroy_image(device, alloc, image);
1324          return vk_error(device, result);
1325       }
1326       radv_rmv_log_bo_allocate(device, image->bindings[0].bo, image->size, true);
1327    }
1328 
1329    if (device->instance->debug_flags & RADV_DEBUG_IMG) {
1330       radv_image_print_info(device, image);
1331    }
1332 
1333    *pImage = radv_image_to_handle(image);
1334 
1335    radv_rmv_log_image_create(device, pCreateInfo, is_internal, *pImage);
1336    if (image->bindings[0].bo)
1337       radv_rmv_log_image_bind(device, *pImage);
1338    return VK_SUCCESS;
1339 }
1340 
1341 unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)1342 radv_plane_from_aspect(VkImageAspectFlags mask)
1343 {
1344    switch (mask) {
1345    case VK_IMAGE_ASPECT_PLANE_1_BIT:
1346    case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1347       return 1;
1348    case VK_IMAGE_ASPECT_PLANE_2_BIT:
1349    case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1350       return 2;
1351    case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1352       return 3;
1353    default:
1354       return 0;
1355    }
1356 }
1357 
1358 VkFormat
radv_get_aspect_format(struct radv_image * image,VkImageAspectFlags mask)1359 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1360 {
1361    switch (mask) {
1362    case VK_IMAGE_ASPECT_PLANE_0_BIT:
1363       return image->planes[0].format;
1364    case VK_IMAGE_ASPECT_PLANE_1_BIT:
1365       return image->planes[1].format;
1366    case VK_IMAGE_ASPECT_PLANE_2_BIT:
1367       return image->planes[2].format;
1368    case VK_IMAGE_ASPECT_STENCIL_BIT:
1369       return vk_format_stencil_only(image->vk.format);
1370    case VK_IMAGE_ASPECT_DEPTH_BIT:
1371       return vk_format_depth_only(image->vk.format);
1372    case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1373       return vk_format_depth_only(image->vk.format);
1374    default:
1375       return image->vk.format;
1376    }
1377 }
1378 
1379 bool
radv_layout_is_htile_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1380 radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
1381                                 unsigned queue_mask)
1382 {
1383    /* Don't compress exclusive images used on transfer queues when SDMA doesn't support HTILE.
1384     * Note that HTILE is already disabled on concurrent images when not supported.
1385     */
1386    if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->rad_info.sdma_supports_compression)
1387       return false;
1388 
1389    switch (layout) {
1390    case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
1391    case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
1392    case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
1393    case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL:
1394       return radv_image_has_htile(image);
1395    case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
1396       return radv_image_is_tc_compat_htile(image) ||
1397              (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
1398    case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
1399    case VK_IMAGE_LAYOUT_GENERAL:
1400       /* It should be safe to enable TC-compat HTILE with
1401        * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
1402        * if the image doesn't have the storage bit set. This
1403        * improves performance for apps that use GENERAL for the main
1404        * depth pass because this allows compression and this reduces
1405        * the number of decompressions from/to GENERAL.
1406        */
1407       if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
1408           !device->instance->drirc.disable_tc_compat_htile_in_general) {
1409          return true;
1410       } else {
1411          return false;
1412       }
1413    case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
1414       /* Do not compress HTILE with feedback loops because we can't read&write it without
1415        * introducing corruption.
1416        */
1417       return false;
1418    case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
1419    case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
1420       if (radv_image_is_tc_compat_htile(image) ||
1421           (radv_image_has_htile(image) &&
1422            !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
1423          /* Keep HTILE compressed if the image is only going to
1424           * be used as a depth/stencil read-only attachment.
1425           */
1426          return true;
1427       } else {
1428          return false;
1429       }
1430       break;
1431    default:
1432       return radv_image_is_tc_compat_htile(image);
1433    }
1434 }
1435 
1436 bool
radv_layout_can_fast_clear(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,unsigned queue_mask)1437 radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level,
1438                            VkImageLayout layout, unsigned queue_mask)
1439 {
1440    if (radv_dcc_enabled(image, level) && !radv_layout_dcc_compressed(device, image, level, layout, queue_mask))
1441       return false;
1442 
1443    if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
1444       return false;
1445 
1446    if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
1447       return false;
1448 
1449    /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
1450     * images can only be fast-cleared if comp-to-single is supported because we don't yet support
1451     * FCE on the compute queue.
1452     */
1453    return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
1454 }
1455 
1456 bool
radv_layout_dcc_compressed(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,unsigned queue_mask)1457 radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
1458                            VkImageLayout layout, unsigned queue_mask)
1459 {
1460    if (!radv_dcc_enabled(image, level))
1461       return false;
1462 
1463    if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
1464       return true;
1465 
1466    /* If the image is read-only, we can always just keep it compressed */
1467    if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
1468       return true;
1469 
1470    /* Don't compress compute transfer dst when image stores are not supported. */
1471    if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
1472        (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
1473       return false;
1474 
1475    /* Don't compress exclusive images used on transfer queues when SDMA doesn't support DCC.
1476     * Note that DCC is already disabled on concurrent images when not supported.
1477     */
1478    if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->rad_info.sdma_supports_compression)
1479       return false;
1480 
1481    if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
1482       /* Do not compress DCC with feedback loops because we can't read&write it without introducing
1483        * corruption.
1484        */
1485       return false;
1486    }
1487 
1488    return device->physical_device->rad_info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
1489 }
1490 
1491 enum radv_fmask_compression
radv_layout_fmask_compression(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1492 radv_layout_fmask_compression(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
1493                               unsigned queue_mask)
1494 {
1495    if (!radv_image_has_fmask(image))
1496       return RADV_FMASK_COMPRESSION_NONE;
1497 
1498    if (layout == VK_IMAGE_LAYOUT_GENERAL)
1499       return RADV_FMASK_COMPRESSION_NONE;
1500 
1501    /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
1502     * expanded before.
1503     */
1504    if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1505       return RADV_FMASK_COMPRESSION_NONE;
1506 
1507    /* Compress images if TC-compat CMASK is enabled. */
1508    if (radv_image_is_tc_compat_cmask(image))
1509       return RADV_FMASK_COMPRESSION_FULL;
1510 
1511    switch (layout) {
1512    case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
1513    case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
1514       /* Don't compress images but no need to expand FMASK. */
1515       return RADV_FMASK_COMPRESSION_PARTIAL;
1516    case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
1517       /* Don't compress images that are in feedback loops. */
1518       return RADV_FMASK_COMPRESSION_NONE;
1519    default:
1520       /* Don't compress images that are concurrent. */
1521       return queue_mask == (1u << RADV_QUEUE_GENERAL) ? RADV_FMASK_COMPRESSION_FULL : RADV_FMASK_COMPRESSION_NONE;
1522    }
1523 }
1524 
1525 unsigned
radv_image_queue_family_mask(const struct radv_image * image,enum radv_queue_family family,enum radv_queue_family queue_family)1526 radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family,
1527                              enum radv_queue_family queue_family)
1528 {
1529    if (!image->exclusive)
1530       return image->queue_family_mask;
1531    if (family == RADV_QUEUE_FOREIGN)
1532       return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
1533    if (family == RADV_QUEUE_IGNORED)
1534       return 1u << queue_family;
1535    return 1u << family;
1536 }
1537 
1538 bool
radv_image_is_renderable(const struct radv_device * device,const struct radv_image * image)1539 radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image)
1540 {
1541    if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1542        image->vk.format == VK_FORMAT_R32G32B32_SFLOAT)
1543       return false;
1544 
1545    if (device->physical_device->rad_info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
1546        vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format))
1547       return false;
1548 
1549    if (image->planes[0].surface.flags & RADEON_SURF_NO_RENDER_TARGET)
1550       return false;
1551 
1552    return true;
1553 }
1554 
1555 unsigned
radv_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)1556 radv_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
1557 {
1558    if (stencil)
1559       return plane->surface.u.legacy.zs.stencil_tiling_index[level];
1560    else
1561       return plane->surface.u.legacy.tiling_index[level];
1562 }
1563 
1564 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateImage(VkDevice _device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)1565 radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator,
1566                  VkImage *pImage)
1567 {
1568 #if DETECT_OS_ANDROID
1569    const VkNativeBufferANDROID *gralloc_info = vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1570 
1571    if (gralloc_info)
1572       return radv_image_from_gralloc(_device, pCreateInfo, gralloc_info, pAllocator, pImage);
1573 #endif
1574 
1575 #ifdef RADV_USE_WSI_PLATFORM
1576    /* Ignore swapchain creation info on Android. Since we don't have an implementation in Mesa,
1577     * we're guaranteed to access an Android object incorrectly.
1578     */
1579    RADV_FROM_HANDLE(radv_device, device, _device);
1580    const VkImageSwapchainCreateInfoKHR *swapchain_info =
1581       vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
1582    if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
1583       return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device, pCreateInfo,
1584                                                swapchain_info->swapchain, pImage);
1585    }
1586 #endif
1587 
1588    const struct wsi_image_create_info *wsi_info = vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1589    bool scanout = wsi_info && wsi_info->scanout;
1590    bool prime_blit_src = wsi_info && wsi_info->blit_src;
1591 
1592    return radv_image_create(_device,
1593                             &(struct radv_image_create_info){
1594                                .vk_info = pCreateInfo,
1595                                .scanout = scanout,
1596                                .prime_blit_src = prime_blit_src,
1597                             },
1598                             pAllocator, pImage, false);
1599 }
1600 
1601 VKAPI_ATTR void VKAPI_CALL
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)1602 radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
1603 {
1604    RADV_FROM_HANDLE(radv_device, device, _device);
1605    RADV_FROM_HANDLE(radv_image, image, _image);
1606 
1607    if (!image)
1608       return;
1609 
1610    radv_destroy_image(device, pAllocator, image);
1611 }
1612 
1613 VKAPI_ATTR VkResult VKAPI_CALL
radv_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)1614 radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindImageMemoryInfo *pBindInfos)
1615 {
1616    RADV_FROM_HANDLE(radv_device, device, _device);
1617 
1618    for (uint32_t i = 0; i < bindInfoCount; ++i) {
1619       RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
1620       RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
1621       VkBindMemoryStatusKHR *status = (void *)vk_find_struct_const(&pBindInfos[i], BIND_MEMORY_STATUS_KHR);
1622 
1623       if (status)
1624          *status->pResult = VK_SUCCESS;
1625 
1626          /* Ignore this struct on Android, we cannot access swapchain structures there. */
1627 #ifdef RADV_USE_WSI_PLATFORM
1628       const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
1629          vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
1630 
1631       if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
1632          struct radv_image *swapchain_img =
1633             radv_image_from_handle(wsi_common_get_image(swapchain_info->swapchain, swapchain_info->imageIndex));
1634 
1635          image->bindings[0].bo = swapchain_img->bindings[0].bo;
1636          image->bindings[0].offset = swapchain_img->bindings[0].offset;
1637          continue;
1638       }
1639 #endif
1640 
1641       if (mem->alloc_size) {
1642          VkImageMemoryRequirementsInfo2 info = {
1643             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1644             .image = pBindInfos[i].image,
1645          };
1646          VkMemoryRequirements2 reqs = {
1647             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1648          };
1649 
1650          radv_GetImageMemoryRequirements2(_device, &info, &reqs);
1651 
1652          if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
1653             if (status)
1654                *status->pResult = VK_ERROR_UNKNOWN;
1655             return vk_errorf(device, VK_ERROR_UNKNOWN, "Device memory object too small for the image.\n");
1656          }
1657       }
1658 
1659       if (image->disjoint) {
1660          const VkBindImagePlaneMemoryInfo *plane_info =
1661             vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_PLANE_MEMORY_INFO);
1662 
1663          switch (plane_info->planeAspect) {
1664          case VK_IMAGE_ASPECT_PLANE_0_BIT:
1665             image->bindings[0].bo = mem->bo;
1666             image->bindings[0].offset = pBindInfos[i].memoryOffset;
1667             break;
1668          case VK_IMAGE_ASPECT_PLANE_1_BIT:
1669             image->bindings[1].bo = mem->bo;
1670             image->bindings[1].offset = pBindInfos[i].memoryOffset;
1671             break;
1672          case VK_IMAGE_ASPECT_PLANE_2_BIT:
1673             image->bindings[2].bo = mem->bo;
1674             image->bindings[2].offset = pBindInfos[i].memoryOffset;
1675             break;
1676          default:
1677             break;
1678          }
1679       } else {
1680          image->bindings[0].bo = mem->bo;
1681          image->bindings[0].offset = pBindInfos[i].memoryOffset;
1682       }
1683       radv_rmv_log_image_bind(device, pBindInfos[i].image);
1684    }
1685    return VK_SUCCESS;
1686 }
1687 
1688 VKAPI_ATTR void VKAPI_CALL
radv_GetImageSubresourceLayout2KHR(VkDevice _device,VkImage _image,const VkImageSubresource2KHR * pSubresource,VkSubresourceLayout2KHR * pLayout)1689 radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkImageSubresource2KHR *pSubresource,
1690                                    VkSubresourceLayout2KHR *pLayout)
1691 {
1692    RADV_FROM_HANDLE(radv_image, image, _image);
1693    RADV_FROM_HANDLE(radv_device, device, _device);
1694    int level = pSubresource->imageSubresource.mipLevel;
1695    int layer = pSubresource->imageSubresource.arrayLayer;
1696 
1697    const unsigned plane_count = vk_format_get_plane_count(image->vk.format);
1698    unsigned plane_id = 0;
1699    if (plane_count > 1)
1700       plane_id = radv_plane_from_aspect(pSubresource->imageSubresource.aspectMask);
1701 
1702    struct radv_image_plane *plane = &image->planes[plane_id];
1703    struct radeon_surf *surface = &plane->surface;
1704 
1705    if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && plane_count == 1) {
1706       unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->imageSubresource.aspectMask);
1707 
1708       assert(level == 0);
1709       assert(layer == 0);
1710 
1711       pLayout->subresourceLayout.offset =
1712          ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, surface, mem_plane_id, 0);
1713       pLayout->subresourceLayout.rowPitch =
1714          ac_surface_get_plane_stride(device->physical_device->rad_info.gfx_level, surface, mem_plane_id, level);
1715       pLayout->subresourceLayout.arrayPitch = 0;
1716       pLayout->subresourceLayout.depthPitch = 0;
1717       pLayout->subresourceLayout.size = ac_surface_get_plane_size(surface, mem_plane_id);
1718    } else if (device->physical_device->rad_info.gfx_level >= GFX9) {
1719       uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1720 
1721       pLayout->subresourceLayout.offset =
1722          ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &plane->surface, 0, layer) +
1723          level_offset;
1724       if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1725           image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
1726          /* Adjust the number of bytes between each row because
1727           * the pitch is actually the number of components per
1728           * row.
1729           */
1730          pLayout->subresourceLayout.rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1731       } else {
1732          uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1733 
1734          assert(util_is_power_of_two_nonzero(surface->bpe));
1735          pLayout->subresourceLayout.rowPitch = pitch * surface->bpe;
1736       }
1737 
1738       pLayout->subresourceLayout.arrayPitch = surface->u.gfx9.surf_slice_size;
1739       pLayout->subresourceLayout.depthPitch = surface->u.gfx9.surf_slice_size;
1740       pLayout->subresourceLayout.size = surface->u.gfx9.surf_slice_size;
1741       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1742          pLayout->subresourceLayout.size *= u_minify(image->vk.extent.depth, level);
1743    } else {
1744       pLayout->subresourceLayout.offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
1745                                           (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1746       pLayout->subresourceLayout.rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1747       pLayout->subresourceLayout.arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1748       pLayout->subresourceLayout.depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1749       pLayout->subresourceLayout.size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1750       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1751          pLayout->subresourceLayout.size *= u_minify(image->vk.extent.depth, level);
1752    }
1753 
1754    VkImageCompressionPropertiesEXT *image_compression_props =
1755       vk_find_struct(pLayout->pNext, IMAGE_COMPRESSION_PROPERTIES_EXT);
1756    if (image_compression_props) {
1757       image_compression_props->imageCompressionFixedRateFlags = VK_IMAGE_COMPRESSION_FIXED_RATE_NONE_EXT;
1758 
1759       if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1760          image_compression_props->imageCompressionFlags =
1761             radv_image_has_htile(image) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT;
1762       } else {
1763          image_compression_props->imageCompressionFlags =
1764             radv_image_has_dcc(image) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT;
1765       }
1766    }
1767 }
1768 
1769 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,VkImage _image,VkImageDrmFormatModifierPropertiesEXT * pProperties)1770 radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
1771                                             VkImageDrmFormatModifierPropertiesEXT *pProperties)
1772 {
1773    RADV_FROM_HANDLE(radv_image, image, _image);
1774 
1775    pProperties->drmFormatModifier = image->planes[0].surface.modifier;
1776    return VK_SUCCESS;
1777 }
1778