1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "util/u_atomic.h"
29 #include "util/u_debug.h"
30 #include "vulkan/util/vk_format.h"
31 #include "ac_drm_fourcc.h"
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_radeon_winsys.h"
35 #include "sid.h"
36 #include "vk_format.h"
37 #include "vk_render_pass.h"
38 #include "vk_util.h"
39
40 #include "gfx10_format_table.h"
41
42 static unsigned
radv_choose_tiling(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)43 radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 if (pCreateInfo->usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
51 return RADEON_SURF_MODE_LINEAR_ALIGNED;
52
53 /* MSAA resources must be 2D tiled. */
54 if (pCreateInfo->samples > 1)
55 return RADEON_SURF_MODE_2D;
56
57 if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
58 device->physical_device->rad_info.gfx_level <= GFX8) {
59 /* this causes hangs in some VK CTS tests on GFX9. */
60 /* Textures with a very small height are recommended to be linear. */
61 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
62 /* Only very thin and long 2D textures should benefit from
63 * linear_aligned. */
64 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
65 return RADEON_SURF_MODE_LINEAR_ALIGNED;
66 }
67
68 return RADEON_SURF_MODE_2D;
69 }
70
71 static bool
radv_use_tc_compat_htile_for_image(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)72 radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
73 {
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device->physical_device->rad_info.gfx_level < GFX8)
76 return false;
77
78 /* TC-compat HTILE looks broken on Tonga (and Iceland is the same design) and the documented bug
79 * workarounds don't help.
80 */
81 if (device->physical_device->rad_info.family == CHIP_TONGA ||
82 device->physical_device->rad_info.family == CHIP_ICELAND)
83 return false;
84
85 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
86 return false;
87
88 /* Do not enable TC-compatible HTILE if the image isn't readable by a
89 * shader because no texture fetches will happen.
90 */
91 if (!(pCreateInfo->usage &
92 (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93 return false;
94
95 if (device->physical_device->rad_info.gfx_level < GFX9) {
96 /* TC-compat HTILE for MSAA depth/stencil images is broken
97 * on GFX8 because the tiling doesn't match.
98 */
99 if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
100 return false;
101
102 /* GFX9+ supports compression for both 32-bit and 16-bit depth
103 * surfaces, while GFX8 only supports 32-bit natively. Though,
104 * the driver allows TC-compat HTILE for 16-bit depth surfaces
105 * with no Z planes compression.
106 */
107 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT && format != VK_FORMAT_D16_UNORM)
108 return false;
109
110 /* TC-compat HTILE for layered images can have interleaved slices (see sliceInterleaved flag
111 * in addrlib). radv_clear_htile does not work.
112 */
113 if (pCreateInfo->arrayLayers > 1)
114 return false;
115 }
116
117 /* GFX9 has issues when the sample count is 4 and the format is D16 */
118 if (device->physical_device->rad_info.gfx_level == GFX9 && pCreateInfo->samples == 4 &&
119 format == VK_FORMAT_D16_UNORM)
120 return false;
121
122 return true;
123 }
124
125 static bool
radv_surface_has_scanout(struct radv_device * device,const struct radv_image_create_info * info)126 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
127 {
128 if (info->bo_metadata) {
129 if (device->physical_device->rad_info.gfx_level >= GFX9)
130 return info->bo_metadata->u.gfx9.scanout;
131 else
132 return info->bo_metadata->u.legacy.scanout;
133 }
134
135 return info->scanout;
136 }
137
138 static bool
radv_image_use_fast_clear_for_image_early(const struct radv_device * device,const struct radv_image * image)139 radv_image_use_fast_clear_for_image_early(const struct radv_device *device, const struct radv_image *image)
140 {
141 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
142 return true;
143
144 if (image->vk.samples <= 1 && image->vk.extent.width * image->vk.extent.height <= 512 * 512) {
145 /* Do not enable CMASK or DCC for small surfaces where the cost
146 * of the eliminate pass can be higher than the benefit of fast
147 * clear. RadeonSI does this, but the image threshold is
148 * different.
149 */
150 return false;
151 }
152
153 return !!(image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
154 }
155
156 static bool
radv_image_use_fast_clear_for_image(const struct radv_device * device,const struct radv_image * image)157 radv_image_use_fast_clear_for_image(const struct radv_device *device, const struct radv_image *image)
158 {
159 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
160 return true;
161
162 return radv_image_use_fast_clear_for_image_early(device, image) && (image->exclusive ||
163 /* Enable DCC for concurrent images if stores are
164 * supported because that means we can keep DCC
165 * compressed on all layouts/queues.
166 */
167 radv_image_use_dcc_image_stores(device, image));
168 }
169
170 bool
radv_are_formats_dcc_compatible(const struct radv_physical_device * pdev,const void * pNext,VkFormat format,VkImageCreateFlags flags,bool * sign_reinterpret)171 radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format,
172 VkImageCreateFlags flags, bool *sign_reinterpret)
173 {
174 bool blendable;
175
176 if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
177 return false;
178
179 if (sign_reinterpret != NULL)
180 *sign_reinterpret = false;
181
182 /* All formats are compatible on GFX11. */
183 if ((flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) && pdev->rad_info.gfx_level < GFX11) {
184 const struct VkImageFormatListCreateInfo *format_list =
185 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
186
187 /* We have to ignore the existence of the list if viewFormatCount = 0 */
188 if (format_list && format_list->viewFormatCount) {
189 /* compatibility is transitive, so we only need to check
190 * one format with everything else. */
191 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
192 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
193 continue;
194
195 if (!radv_dcc_formats_compatible(pdev->rad_info.gfx_level, format, format_list->pViewFormats[i],
196 sign_reinterpret))
197 return false;
198 }
199 } else {
200 return false;
201 }
202 }
203
204 return true;
205 }
206
207 static bool
radv_format_is_atomic_allowed(struct radv_device * device,VkFormat format)208 radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
209 {
210 if (format == VK_FORMAT_R32_SFLOAT && !radv_uses_image_float32_atomics(device))
211 return false;
212
213 return radv_is_atomic_format_supported(format);
214 }
215
216 static bool
radv_formats_is_atomic_allowed(struct radv_device * device,const void * pNext,VkFormat format,VkImageCreateFlags flags)217 radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format, VkImageCreateFlags flags)
218 {
219 if (radv_format_is_atomic_allowed(device, format))
220 return true;
221
222 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
223 const struct VkImageFormatListCreateInfo *format_list =
224 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
225
226 /* We have to ignore the existence of the list if viewFormatCount = 0 */
227 if (format_list && format_list->viewFormatCount) {
228 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
229 if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
230 return true;
231 }
232 }
233 }
234
235 return false;
236 }
237
238 static bool
radv_use_dcc_for_image_early(struct radv_device * device,struct radv_image * image,const VkImageCreateInfo * pCreateInfo,VkFormat format,bool * sign_reinterpret)239 radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo,
240 VkFormat format, bool *sign_reinterpret)
241 {
242 /* DCC (Delta Color Compression) is only available for GFX8+. */
243 if (device->physical_device->rad_info.gfx_level < GFX8)
244 return false;
245
246 const VkImageCompressionControlEXT *compression =
247 vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
248
249 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC ||
250 (compression && compression->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT)) {
251 return false;
252 }
253
254 if (image->shareable && image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
255 return false;
256
257 /*
258 * TODO: Enable DCC for storage images on GFX9 and earlier.
259 *
260 * Also disable DCC with atomics because even when DCC stores are
261 * supported atomics will always decompress. So if we are
262 * decompressing a lot anyway we might as well not have DCC.
263 */
264 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
265 (device->physical_device->rad_info.gfx_level < GFX10 ||
266 radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
267 return false;
268
269 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
270 return false;
271
272 if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
273 return false;
274
275 if (!radv_image_use_fast_clear_for_image_early(device, image) &&
276 image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
277 return false;
278
279 /* Do not enable DCC for mipmapped arrays because performance is worse. */
280 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
281 return false;
282
283 if (device->physical_device->rad_info.gfx_level < GFX10) {
284 /* TODO: Add support for DCC MSAA on GFX8-9. */
285 if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
286 return false;
287
288 /* TODO: Add support for DCC layers/mipmaps on GFX9. */
289 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
290 device->physical_device->rad_info.gfx_level == GFX9)
291 return false;
292 }
293
294 /* DCC MSAA can't work on GFX10.3 and earlier without FMASK. */
295 if (pCreateInfo->samples > 1 && device->physical_device->rad_info.gfx_level < GFX11 &&
296 (device->instance->debug_flags & RADV_DEBUG_NO_FMASK))
297 return false;
298
299 return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, pCreateInfo->flags,
300 sign_reinterpret);
301 }
302
303 static bool
radv_use_dcc_for_image_late(struct radv_device * device,struct radv_image * image)304 radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
305 {
306 if (!radv_image_has_dcc(image))
307 return false;
308
309 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
310 return true;
311
312 if (!radv_image_use_fast_clear_for_image(device, image))
313 return false;
314
315 /* TODO: Fix storage images with DCC without DCC image stores.
316 * Disabling it for now. */
317 if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image))
318 return false;
319
320 return true;
321 }
322
323 /*
324 * Whether to enable image stores with DCC compression for this image. If
325 * this function returns false the image subresource should be decompressed
326 * before using it with image stores.
327 *
328 * Note that this can have mixed performance implications, see
329 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
330 *
331 * This function assumes the image uses DCC compression.
332 */
333 bool
radv_image_use_dcc_image_stores(const struct radv_device * device,const struct radv_image * image)334 radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
335 {
336 return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.gfx_level, &image->planes[0].surface);
337 }
338
339 /*
340 * Whether to use a predicate to determine whether DCC is in a compressed
341 * state. This can be used to avoid decompressing an image multiple times.
342 */
343 bool
radv_image_use_dcc_predication(const struct radv_device * device,const struct radv_image * image)344 radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
345 {
346 return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
347 }
348
349 static inline bool
radv_use_fmask_for_image(const struct radv_device * device,const struct radv_image * image)350 radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
351 {
352 if (device->physical_device->rad_info.gfx_level == GFX9 && image->vk.array_layers > 1) {
353 /* On GFX9, FMASK can be interleaved with layers and this isn't properly supported. */
354 return false;
355 }
356
357 return device->physical_device->use_fmask && image->vk.samples > 1 &&
358 ((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
359 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
360 }
361
362 static inline bool
radv_use_htile_for_image(const struct radv_device * device,const struct radv_image * image,const VkImageCreateInfo * pCreateInfo)363 radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image,
364 const VkImageCreateInfo *pCreateInfo)
365 {
366 const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
367
368 const VkImageCompressionControlEXT *compression =
369 vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
370
371 if (device->instance->debug_flags & RADV_DEBUG_NO_HIZ ||
372 (compression && compression->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT))
373 return false;
374
375 /* TODO:
376 * - Investigate about mips+layers.
377 * - Enable on other gens.
378 */
379 bool use_htile_for_mips = image->vk.array_layers == 1 && device->physical_device->rad_info.gfx_level >= GFX10;
380
381 /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
382 if (device->physical_device->rad_info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT &&
383 image->vk.mip_levels > 1)
384 return false;
385
386 /* Do not enable HTILE for very small images because it seems less performant but make sure it's
387 * allowed with VRS attachments because we need HTILE on GFX10.3.
388 */
389 if (image->vk.extent.width * image->vk.extent.height < 8 * 8 &&
390 !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
391 !(gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate))
392 return false;
393
394 return (image->vk.mip_levels == 1 || use_htile_for_mips) && !image->shareable;
395 }
396
397 static bool
radv_use_tc_compat_cmask_for_image(struct radv_device * device,struct radv_image * image)398 radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
399 {
400 /* TC-compat CMASK is only available for GFX8+. */
401 if (device->physical_device->rad_info.gfx_level < GFX8)
402 return false;
403
404 /* GFX9 has issues when sample count is greater than 2 */
405 if (device->physical_device->rad_info.gfx_level == GFX9 && image->vk.samples > 2)
406 return false;
407
408 if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
409 return false;
410
411 /* TC-compat CMASK with storage images is supported on GFX10+. */
412 if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && device->physical_device->rad_info.gfx_level < GFX10)
413 return false;
414
415 /* Do not enable TC-compatible if the image isn't readable by a shader
416 * because no texture fetches will happen.
417 */
418 if (!(image->vk.usage &
419 (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
420 return false;
421
422 /* If the image doesn't have FMASK, it can't be fetchable. */
423 if (!radv_image_has_fmask(image))
424 return false;
425
426 return true;
427 }
428
429 static uint32_t
radv_get_bo_metadata_word1(const struct radv_device * device)430 radv_get_bo_metadata_word1(const struct radv_device *device)
431 {
432 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
433 }
434
435 static bool
radv_is_valid_opaque_metadata(const struct radv_device * device,const struct radeon_bo_metadata * md)436 radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
437 {
438 if (md->metadata[0] != 1 || md->metadata[1] != radv_get_bo_metadata_word1(device))
439 return false;
440
441 if (md->size_metadata < 40)
442 return false;
443
444 return true;
445 }
446
447 static void
radv_patch_surface_from_metadata(struct radv_device * device,struct radeon_surf * surface,const struct radeon_bo_metadata * md)448 radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
449 const struct radeon_bo_metadata *md)
450 {
451 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
452
453 if (device->physical_device->rad_info.gfx_level >= GFX9) {
454 if (md->u.gfx9.swizzle_mode > 0)
455 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
456 else
457 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
458
459 surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
460 } else {
461 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
462 surface->u.legacy.bankw = md->u.legacy.bankw;
463 surface->u.legacy.bankh = md->u.legacy.bankh;
464 surface->u.legacy.tile_split = md->u.legacy.tile_split;
465 surface->u.legacy.mtilea = md->u.legacy.mtilea;
466 surface->u.legacy.num_banks = md->u.legacy.num_banks;
467
468 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
469 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
470 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
471 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
472 else
473 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
474 }
475 }
476
477 static VkResult
radv_patch_image_dimensions(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)478 radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
479 const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
480 {
481 unsigned width = image->vk.extent.width;
482 unsigned height = image->vk.extent.height;
483
484 /*
485 * minigbm sometimes allocates bigger images which is going to result in
486 * weird strides and other properties. Lets be lenient where possible and
487 * fail it on GFX10 (as we cannot cope there).
488 *
489 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
490 */
491 if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
492 const struct radeon_bo_metadata *md = create_info->bo_metadata;
493
494 if (device->physical_device->rad_info.gfx_level >= GFX10) {
495 width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
496 height = G_00A008_HEIGHT(md->metadata[4]) + 1;
497 } else {
498 width = G_008F18_WIDTH(md->metadata[4]) + 1;
499 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
500 }
501 }
502
503 if (image->vk.extent.width == width && image->vk.extent.height == height)
504 return VK_SUCCESS;
505
506 if (width < image->vk.extent.width || height < image->vk.extent.height) {
507 fprintf(stderr,
508 "The imported image has smaller dimensions than the internal\n"
509 "dimensions. Using it is going to fail badly, so we reject\n"
510 "this import.\n"
511 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
512 image->vk.extent.width, image->vk.extent.height, width, height);
513 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
514 } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
515 fprintf(stderr,
516 "Tried to import an image with inconsistent width on GFX10.\n"
517 "As GFX10 has no separate stride fields we cannot cope with\n"
518 "an inconsistency in width and will fail this import.\n"
519 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
520 image->vk.extent.width, image->vk.extent.height, width, height);
521 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
522 } else {
523 fprintf(stderr,
524 "Tried to import an image with inconsistent width on pre-GFX10.\n"
525 "As GFX10 has no separate stride fields we cannot cope with\n"
526 "an inconsistency and would fail on GFX10.\n"
527 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
528 image->vk.extent.width, image->vk.extent.height, width, height);
529 }
530 image_info->width = width;
531 image_info->height = height;
532
533 return VK_SUCCESS;
534 }
535
536 static VkResult
radv_patch_image_from_extra_info(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)537 radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
538 const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
539 {
540 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
541 if (result != VK_SUCCESS)
542 return result;
543
544 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
545 if (create_info->bo_metadata) {
546 radv_patch_surface_from_metadata(device, &image->planes[plane].surface, create_info->bo_metadata);
547 }
548
549 if (radv_surface_has_scanout(device, create_info)) {
550 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
551 if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
552 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
553
554 image_info->surf_index = NULL;
555 }
556
557 if (create_info->prime_blit_src && !device->physical_device->rad_info.sdma_supports_compression) {
558 /* Older SDMA hw can't handle DCC */
559 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
560 }
561 }
562 return VK_SUCCESS;
563 }
564
565 static VkFormat
radv_image_get_plane_format(const struct radv_physical_device * pdev,const struct radv_image * image,unsigned plane)566 radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image, unsigned plane)
567 {
568 if (radv_is_format_emulated(pdev, image->vk.format)) {
569 if (plane == 0)
570 return image->vk.format;
571 if (vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
572 return vk_texcompress_astc_emulation_format(image->vk.format);
573 else
574 return vk_texcompress_etc2_emulation_format(image->vk.format);
575 }
576
577 return vk_format_get_plane_format(image->vk.format, plane);
578 }
579
580 static uint64_t
radv_get_surface_flags(struct radv_device * device,struct radv_image * image,unsigned plane_id,const VkImageCreateInfo * pCreateInfo,VkFormat image_format)581 radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
582 const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
583 {
584 uint64_t flags;
585 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
586 VkFormat format = radv_image_get_plane_format(device->physical_device, image, plane_id);
587 const struct util_format_description *desc = vk_format_description(format);
588 bool is_depth, is_stencil;
589
590 is_depth = util_format_has_depth(desc);
591 is_stencil = util_format_has_stencil(desc);
592
593 flags = RADEON_SURF_SET(array_mode, MODE);
594
595 switch (pCreateInfo->imageType) {
596 case VK_IMAGE_TYPE_1D:
597 if (pCreateInfo->arrayLayers > 1)
598 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
599 else
600 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
601 break;
602 case VK_IMAGE_TYPE_2D:
603 if (pCreateInfo->arrayLayers > 1)
604 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
605 else
606 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
607 break;
608 case VK_IMAGE_TYPE_3D:
609 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
610 break;
611 default:
612 unreachable("unhandled image type");
613 }
614
615 /* Required for clearing/initializing a specific layer on GFX8. */
616 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
617
618 if (is_depth) {
619 flags |= RADEON_SURF_ZBUFFER;
620
621 if (is_depth && is_stencil && device->physical_device->rad_info.gfx_level <= GFX8) {
622 if (!(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))
623 flags |= RADEON_SURF_NO_RENDER_TARGET;
624
625 /* RADV doesn't support stencil pitch adjustment. As a result there are some spec gaps that
626 * are not covered by CTS.
627 *
628 * For D+S images with pitch constraints due to rendertarget usage it can happen that
629 * sampling from mipmaps beyond the base level of the descriptor is broken as the pitch
630 * adjustment can't be applied to anything beyond the first level.
631 */
632 flags |= RADEON_SURF_NO_STENCIL_ADJUST;
633 }
634
635 if (radv_use_htile_for_image(device, image, pCreateInfo) && !(flags & RADEON_SURF_NO_RENDER_TARGET)) {
636 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
637 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
638 } else {
639 flags |= RADEON_SURF_NO_HTILE;
640 }
641 }
642
643 if (is_stencil)
644 flags |= RADEON_SURF_SBUFFER;
645
646 if (device->physical_device->rad_info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
647 vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
648 flags |= RADEON_SURF_NO_RENDER_TARGET;
649
650 if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format, &image->dcc_sign_reinterpret))
651 flags |= RADEON_SURF_DISABLE_DCC;
652
653 if (!radv_use_fmask_for_image(device, image))
654 flags |= RADEON_SURF_NO_FMASK;
655
656 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
657 flags |= RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
658 }
659
660 if (image->queue_family_mask & BITFIELD_BIT(RADV_QUEUE_TRANSFER)) {
661 if (!device->physical_device->rad_info.sdma_supports_compression)
662 flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_HTILE;
663 }
664
665 /* Disable DCC for VRS rate images because the hw can't handle compression. */
666 if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
667 flags |= RADEON_SURF_VRS_RATE | RADEON_SURF_DISABLE_DCC;
668 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)))
669 flags |= RADEON_SURF_NO_TEXTURE;
670
671 return flags;
672 }
673
674 unsigned
radv_map_swizzle(unsigned swizzle)675 radv_map_swizzle(unsigned swizzle)
676 {
677 switch (swizzle) {
678 case PIPE_SWIZZLE_Y:
679 return V_008F0C_SQ_SEL_Y;
680 case PIPE_SWIZZLE_Z:
681 return V_008F0C_SQ_SEL_Z;
682 case PIPE_SWIZZLE_W:
683 return V_008F0C_SQ_SEL_W;
684 case PIPE_SWIZZLE_0:
685 return V_008F0C_SQ_SEL_0;
686 case PIPE_SWIZZLE_1:
687 return V_008F0C_SQ_SEL_1;
688 default: /* PIPE_SWIZZLE_X */
689 return V_008F0C_SQ_SEL_X;
690 }
691 }
692
693 void
radv_compose_swizzle(const struct util_format_description * desc,const VkComponentMapping * mapping,enum pipe_swizzle swizzle[4])694 radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
695 enum pipe_swizzle swizzle[4])
696 {
697 if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
698 /* 64-bit formats only support storage images and storage images
699 * require identity component mappings. We use 32-bit
700 * instructions to access 64-bit images, so we need a special
701 * case here.
702 *
703 * The zw components are 1,0 so that they can be easily be used
704 * by loads to create the w component, which has to be 0 for
705 * NULL descriptors.
706 */
707 swizzle[0] = PIPE_SWIZZLE_X;
708 swizzle[1] = PIPE_SWIZZLE_Y;
709 swizzle[2] = PIPE_SWIZZLE_1;
710 swizzle[3] = PIPE_SWIZZLE_0;
711 } else if (!mapping) {
712 for (unsigned i = 0; i < 4; i++)
713 swizzle[i] = desc->swizzle[i];
714 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
715 const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1};
716 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
717 } else {
718 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
719 }
720 }
721
722 bool
vi_alpha_is_on_msb(const struct radv_device * device,const VkFormat format)723 vi_alpha_is_on_msb(const struct radv_device *device, const VkFormat format)
724 {
725 if (device->physical_device->rad_info.gfx_level >= GFX11)
726 return false;
727
728 const struct util_format_description *desc = vk_format_description(format);
729
730 if (device->physical_device->rad_info.gfx_level >= GFX10 && desc->nr_channels == 1)
731 return desc->swizzle[3] == PIPE_SWIZZLE_X;
732
733 return radv_translate_colorswap(format, false) <= 1;
734 }
735
736 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,unsigned plane_id,struct radeon_bo_metadata * md)737 radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, unsigned plane_id,
738 struct radeon_bo_metadata *md)
739 {
740 static const VkComponentMapping fixedmapping;
741 const VkFormat plane_format = radv_image_get_plane_format(device->physical_device, image, plane_id);
742 const unsigned plane_width = vk_format_get_plane_width(image->vk.format, plane_id, image->vk.extent.width);
743 const unsigned plane_height = vk_format_get_plane_height(image->vk.format, plane_id, image->vk.extent.height);
744 struct radeon_surf *surface = &image->planes[plane_id].surface;
745 const struct legacy_surf_level *base_level_info =
746 device->physical_device->rad_info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
747 uint32_t desc[8];
748
749 radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, plane_format,
750 &fixedmapping, 0, image->vk.mip_levels - 1, 0, image->vk.array_layers - 1, plane_width,
751 plane_height, image->vk.extent.depth, 0.0f, desc, NULL, 0, NULL, NULL);
752
753 radv_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, 0, 0, surface->blk_w, false, false, false,
754 false, desc, NULL);
755
756 ac_surface_compute_umd_metadata(&device->physical_device->rad_info, surface, image->vk.mip_levels, desc,
757 &md->size_metadata, md->metadata,
758 device->instance->debug_flags & RADV_DEBUG_EXTRA_MD);
759 }
760
761 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)762 radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata)
763 {
764 /* use plane 0, even when there are multiple planes, to follow radeonsi */
765 const unsigned plane_id = 0;
766 struct radeon_surf *surface = &image->planes[plane_id].surface;
767
768 memset(metadata, 0, sizeof(*metadata));
769
770 if (device->physical_device->rad_info.gfx_level >= GFX9) {
771 uint64_t dcc_offset =
772 image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
773 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
774 metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
775 metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
776 metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
777 metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
778 metadata->u.gfx9.dcc_max_compressed_block_size = surface->u.gfx9.color.dcc.max_compressed_block_size;
779 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
780 } else {
781 metadata->u.legacy.microtile =
782 surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
783 metadata->u.legacy.macrotile =
784 surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
785 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
786 metadata->u.legacy.bankw = surface->u.legacy.bankw;
787 metadata->u.legacy.bankh = surface->u.legacy.bankh;
788 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
789 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
790 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
791 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
792 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
793 }
794 radv_query_opaque_metadata(device, image, plane_id, metadata);
795 }
796
797 void
radv_image_override_offset_stride(struct radv_device * device,struct radv_image * image,uint64_t offset,uint32_t stride)798 radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
799 uint32_t stride)
800 {
801 ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
802 image->vk.array_layers, image->vk.mip_levels, offset, stride);
803 }
804
805 static void
radv_image_alloc_single_sample_cmask(const struct radv_device * device,const struct radv_image * image,struct radeon_surf * surf)806 radv_image_alloc_single_sample_cmask(const struct radv_device *device, const struct radv_image *image,
807 struct radeon_surf *surf)
808 {
809 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->vk.mip_levels > 1 ||
810 image->vk.extent.depth > 1 || radv_image_has_dcc(image) || !radv_image_use_fast_clear_for_image(device, image) ||
811 (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
812 return;
813
814 assert(image->vk.samples == 1);
815
816 surf->cmask_offset = align64(surf->total_size, 1ull << surf->cmask_alignment_log2);
817 surf->total_size = surf->cmask_offset + surf->cmask_size;
818 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
819 }
820
821 static void
radv_image_alloc_values(const struct radv_device * device,struct radv_image * image)822 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
823 {
824 /* images with modifiers can be potentially imported */
825 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
826 return;
827
828 if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
829 image->fce_pred_offset = image->size;
830 image->size += 8 * image->vk.mip_levels;
831 }
832
833 if (radv_image_use_dcc_predication(device, image)) {
834 image->dcc_pred_offset = image->size;
835 image->size += 8 * image->vk.mip_levels;
836 }
837
838 if ((radv_image_has_dcc(image) && !image->support_comp_to_single) || radv_image_has_cmask(image) ||
839 radv_image_has_htile(image)) {
840 image->clear_value_offset = image->size;
841 image->size += 8 * image->vk.mip_levels;
842 }
843
844 if (radv_image_is_tc_compat_htile(image) && device->physical_device->rad_info.has_tc_compat_zrange_bug) {
845 /* Metadata for the TC-compatible HTILE hardware bug which
846 * have to be fixed by updating ZRANGE_PRECISION when doing
847 * fast depth clears to 0.0f.
848 */
849 image->tc_compat_zrange_offset = image->size;
850 image->size += image->vk.mip_levels * 4;
851 }
852 }
853
854 /* Determine if the image is affected by the pipe misaligned metadata issue
855 * which requires to invalidate L2.
856 */
857 static bool
radv_image_is_pipe_misaligned(const struct radv_device * device,const struct radv_image * image)858 radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
859 {
860 const struct radeon_info *rad_info = &device->physical_device->rad_info;
861 int log2_samples = util_logbase2(image->vk.samples);
862
863 assert(rad_info->gfx_level >= GFX10);
864
865 for (unsigned i = 0; i < image->plane_count; ++i) {
866 VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i);
867 int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
868 int log2_bpp_and_samples;
869
870 if (rad_info->gfx_level >= GFX10_3) {
871 log2_bpp_and_samples = log2_bpp + log2_samples;
872 } else {
873 if (vk_format_has_depth(image->vk.format) && image->vk.array_layers >= 8) {
874 log2_bpp = 2;
875 }
876
877 log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
878 }
879
880 int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
881 int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
882
883 if (vk_format_has_depth(image->vk.format)) {
884 if (radv_image_is_tc_compat_htile(image) && overlap) {
885 return true;
886 }
887 } else {
888 int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
889 int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
890 int samples_overlap = MIN2(log2_samples, overlap);
891
892 /* TODO: It shouldn't be necessary if the image has DCC but
893 * not readable by shader.
894 */
895 if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
896 (samples_overlap > log2_samples_frag_diff)) {
897 return true;
898 }
899 }
900 }
901
902 return false;
903 }
904
905 static bool
radv_image_is_l2_coherent(const struct radv_device * device,const struct radv_image * image)906 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
907 {
908 if (device->physical_device->rad_info.gfx_level >= GFX10) {
909 return !device->physical_device->rad_info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
910 } else if (device->physical_device->rad_info.gfx_level == GFX9) {
911 if (image->vk.samples == 1 &&
912 (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
913 !vk_format_has_stencil(image->vk.format)) {
914 /* Single-sample color and single-sample depth
915 * (not stencil) are coherent with shaders on
916 * GFX9.
917 */
918 return true;
919 }
920 }
921
922 return false;
923 }
924
925 /**
926 * Determine if the given image can be fast cleared.
927 */
928 bool
radv_image_can_fast_clear(const struct radv_device * device,const struct radv_image * image)929 radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
930 {
931 if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
932 return false;
933
934 if (vk_format_is_color(image->vk.format)) {
935 if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
936 return false;
937
938 /* RB+ doesn't work with CMASK fast clear on Stoney. */
939 if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
940 return false;
941
942 /* Fast-clears with CMASK aren't supported for 128-bit formats. */
943 if (radv_image_has_cmask(image) && vk_format_get_blocksizebits(image->vk.format) > 64)
944 return false;
945 } else {
946 if (!radv_image_has_htile(image))
947 return false;
948 }
949
950 /* Do not fast clears 3D images. */
951 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
952 return false;
953
954 return true;
955 }
956
957 /**
958 * Determine if the given image can be fast cleared using comp-to-single.
959 */
960 static bool
radv_image_use_comp_to_single(const struct radv_device * device,const struct radv_image * image)961 radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
962 {
963 /* comp-to-single is only available for GFX10+. */
964 if (device->physical_device->rad_info.gfx_level < GFX10)
965 return false;
966
967 /* If the image can't be fast cleared, comp-to-single can't be used. */
968 if (!radv_image_can_fast_clear(device, image))
969 return false;
970
971 /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
972 if (!radv_image_has_dcc(image))
973 return false;
974
975 /* It seems 8bpp and 16bpp require RB+ to work. */
976 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
977 if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed)
978 return false;
979
980 return true;
981 }
982
983 static unsigned
radv_get_internal_plane_count(const struct radv_physical_device * pdev,VkFormat fmt)984 radv_get_internal_plane_count(const struct radv_physical_device *pdev, VkFormat fmt)
985 {
986 if (radv_is_format_emulated(pdev, fmt))
987 return 2;
988 return vk_format_get_plane_count(fmt);
989 }
990
991 static void
radv_image_reset_layout(const struct radv_physical_device * pdev,struct radv_image * image)992 radv_image_reset_layout(const struct radv_physical_device *pdev, struct radv_image *image)
993 {
994 image->size = 0;
995 image->alignment = 1;
996
997 image->tc_compatible_cmask = 0;
998 image->fce_pred_offset = image->dcc_pred_offset = 0;
999 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1000
1001 unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
1002 for (unsigned i = 0; i < plane_count; ++i) {
1003 VkFormat format = radv_image_get_plane_format(pdev, image, i);
1004 if (vk_format_has_depth(format))
1005 format = vk_format_depth_only(format);
1006
1007 uint64_t flags = image->planes[i].surface.flags;
1008 uint64_t modifier = image->planes[i].surface.modifier;
1009 memset(image->planes + i, 0, sizeof(image->planes[i]));
1010
1011 image->planes[i].surface.flags = flags;
1012 image->planes[i].surface.modifier = modifier;
1013 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1014 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1015 image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1016
1017 /* align byte per element on dword */
1018 if (image->planes[i].surface.bpe == 3) {
1019 image->planes[i].surface.bpe = 4;
1020 }
1021 }
1022 }
1023
1024 struct ac_surf_info
radv_get_ac_surf_info(struct radv_device * device,const struct radv_image * image)1025 radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image)
1026 {
1027 struct ac_surf_info info;
1028
1029 memset(&info, 0, sizeof(info));
1030
1031 info.width = image->vk.extent.width;
1032 info.height = image->vk.extent.height;
1033 info.depth = image->vk.extent.depth;
1034 info.samples = image->vk.samples;
1035 info.storage_samples = image->vk.samples;
1036 info.array_size = image->vk.array_layers;
1037 info.levels = image->vk.mip_levels;
1038 info.num_channels = vk_format_get_nr_components(image->vk.format);
1039
1040 if (!vk_format_is_depth_or_stencil(image->vk.format) && !image->shareable &&
1041 !(image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT | VK_IMAGE_CREATE_ALIAS_BIT)) &&
1042 image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1043 info.surf_index = &device->image_mrt_offset_counter;
1044 }
1045
1046 return info;
1047 }
1048
1049 VkResult
radv_image_create_layout(struct radv_device * device,struct radv_image_create_info create_info,const struct VkImageDrmFormatModifierExplicitCreateInfoEXT * mod_info,const struct VkVideoProfileListInfoKHR * profile_list,struct radv_image * image)1050 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1051 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1052 const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image)
1053 {
1054 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1055 * common internal case. */
1056 create_info.vk_info = NULL;
1057
1058 struct ac_surf_info image_info = radv_get_ac_surf_info(device, image);
1059 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1060 if (result != VK_SUCCESS)
1061 return result;
1062
1063 assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1064
1065 radv_image_reset_layout(device->physical_device, image);
1066
1067 /*
1068 * Due to how the decoder works, the user can't supply an oversized image, because if it attempts
1069 * to sample it later with a linear filter, it will get garbage after the height it wants,
1070 * so we let the user specify the width/height unaligned, and align them preallocation.
1071 */
1072 if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) {
1073 assert(profile_list);
1074 uint32_t width_align, height_align;
1075 radv_video_get_profile_alignments(device->physical_device, profile_list, &width_align, &height_align);
1076 image_info.width = align(image_info.width, width_align);
1077 image_info.height = align(image_info.height, height_align);
1078
1079 if (radv_has_uvd(device->physical_device) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) {
1080 /* UVD and kernel demand a full DPB allocation. */
1081 image_info.array_size = MIN2(16, image_info.array_size);
1082 }
1083 }
1084
1085 unsigned plane_count = radv_get_internal_plane_count(device->physical_device, image->vk.format);
1086 for (unsigned plane = 0; plane < plane_count; ++plane) {
1087 struct ac_surf_info info = image_info;
1088 uint64_t offset;
1089 unsigned stride;
1090
1091 info.width = vk_format_get_plane_width(image->vk.format, plane, info.width);
1092 info.height = vk_format_get_plane_height(image->vk.format, plane, info.height);
1093
1094 if (create_info.no_metadata_planes || plane_count > 1) {
1095 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1096 }
1097
1098 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1099
1100 if (plane == 0) {
1101 if (!radv_use_dcc_for_image_late(device, image))
1102 ac_surface_zero_dcc_fields(&image->planes[0].surface);
1103 }
1104
1105 if (create_info.bo_metadata && !mod_info &&
1106 !ac_surface_apply_umd_metadata(&device->physical_device->rad_info, &image->planes[plane].surface,
1107 image->vk.samples, image->vk.mip_levels,
1108 create_info.bo_metadata->size_metadata, create_info.bo_metadata->metadata))
1109 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1110
1111 if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info)
1112 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1113
1114 if (mod_info) {
1115 if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1116 !mod_info->pPlaneLayouts[plane].rowPitch)
1117 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1118
1119 offset = mod_info->pPlaneLayouts[plane].offset;
1120 stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1121 } else {
1122 offset = image->disjoint ? 0 : align64(image->size, 1ull << image->planes[plane].surface.alignment_log2);
1123 stride = 0; /* 0 means no override */
1124 }
1125
1126 if (!ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[plane].surface,
1127 image->vk.array_layers, image->vk.mip_levels, offset, stride))
1128 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1129
1130 /* Validate DCC offsets in modifier layout. */
1131 if (plane_count == 1 && mod_info) {
1132 unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1133 if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1134 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1135
1136 for (unsigned i = 1; i < mem_planes; ++i) {
1137 if (ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &image->planes[plane].surface,
1138 i, 0) != mod_info->pPlaneLayouts[i].offset)
1139 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1140 }
1141 }
1142
1143 image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1144 image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1145
1146 image->planes[plane].format = radv_image_get_plane_format(device->physical_device, image, plane);
1147 }
1148
1149 image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1150
1151 image->l2_coherent = radv_image_is_l2_coherent(device, image);
1152
1153 image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1154
1155 radv_image_alloc_values(device, image);
1156
1157 assert(image->planes[0].surface.surf_size);
1158 assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1159 ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1160 return VK_SUCCESS;
1161 }
1162
1163 static void
radv_destroy_image(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_image * image)1164 radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_image *image)
1165 {
1166 if ((image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bindings[0].bo) {
1167 radv_rmv_log_bo_destroy(device, image->bindings[0].bo);
1168 device->ws->buffer_destroy(device->ws, image->bindings[0].bo);
1169 }
1170
1171 if (image->owned_memory != VK_NULL_HANDLE) {
1172 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1173 radv_free_memory(device, pAllocator, mem);
1174 }
1175
1176 radv_rmv_log_resource_destroy(device, (uint64_t)radv_image_to_handle(image));
1177 vk_image_finish(&image->vk);
1178 vk_free2(&device->vk.alloc, pAllocator, image);
1179 }
1180
1181 static void
radv_image_print_info(struct radv_device * device,struct radv_image * image)1182 radv_image_print_info(struct radv_device *device, struct radv_image *image)
1183 {
1184 fprintf(stderr, "Image:\n");
1185 fprintf(stderr,
1186 " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1187 "width=%" PRIu32 ", height=%" PRIu32 ", depth=%" PRIu32 ", "
1188 "array_size=%" PRIu32 ", levels=%" PRIu32 "\n",
1189 image->size, image->alignment, image->vk.extent.width, image->vk.extent.height, image->vk.extent.depth,
1190 image->vk.array_layers, image->vk.mip_levels);
1191 for (unsigned i = 0; i < image->plane_count; ++i) {
1192 const struct radv_image_plane *plane = &image->planes[i];
1193 const struct radeon_surf *surf = &plane->surface;
1194 const struct util_format_description *desc = vk_format_description(plane->format);
1195 uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &plane->surface, 0, 0);
1196
1197 fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1198
1199 ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
1200 }
1201 }
1202
1203 static uint64_t
radv_select_modifier(const struct radv_device * dev,VkFormat format,const struct VkImageDrmFormatModifierListCreateInfoEXT * mod_list)1204 radv_select_modifier(const struct radv_device *dev, VkFormat format,
1205 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1206 {
1207 const struct radv_physical_device *pdev = dev->physical_device;
1208 unsigned mod_count;
1209
1210 assert(mod_list->drmFormatModifierCount);
1211
1212 /* We can allow everything here as it does not affect order and the application
1213 * is only allowed to specify modifiers that we support. */
1214 const struct ac_modifier_options modifier_options = {
1215 .dcc = true,
1216 .dcc_retile = true,
1217 };
1218
1219 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, NULL);
1220
1221 uint64_t *mods = calloc(mod_count, sizeof(*mods));
1222
1223 /* If allocations fail, fall back to a dumber solution. */
1224 if (!mods)
1225 return mod_list->pDrmFormatModifiers[0];
1226
1227 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, mods);
1228
1229 for (unsigned i = 0; i < mod_count; ++i) {
1230 for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1231 if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1232 free(mods);
1233 return mod_list->pDrmFormatModifiers[j];
1234 }
1235 }
1236 }
1237 unreachable("App specified an invalid modifier");
1238 }
1239
1240 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage,bool is_internal)1241 radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1242 const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal)
1243 {
1244 RADV_FROM_HANDLE(radv_device, device, _device);
1245 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1246 uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1247 struct radv_image *image = NULL;
1248 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1249 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1250 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1251 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1252 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1253 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1254 const struct VkVideoProfileListInfoKHR *profile_list =
1255 vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
1256
1257 unsigned plane_count = radv_get_internal_plane_count(device->physical_device, format);
1258
1259 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1260
1261 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1262 if (!image)
1263 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1264
1265 vk_image_init(&device->vk, &image->vk, pCreateInfo);
1266
1267 image->plane_count = vk_format_get_plane_count(format);
1268 image->disjoint = image->plane_count > 1 && pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT;
1269
1270 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1271 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1272 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1273 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1274 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1275 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1276 else
1277 image->queue_family_mask |=
1278 1u << vk_queue_to_radv(device->physical_device, pCreateInfo->pQueueFamilyIndices[i]);
1279
1280 /* This queue never really accesses the image. */
1281 image->queue_family_mask &= ~(1u << RADV_QUEUE_SPARSE);
1282 }
1283
1284 const VkExternalMemoryImageCreateInfo *external_info =
1285 vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1286
1287 image->shareable = external_info;
1288
1289 if (mod_list)
1290 modifier = radv_select_modifier(device, format, mod_list);
1291 else if (explicit_mod)
1292 modifier = explicit_mod->drmFormatModifier;
1293
1294 for (unsigned plane = 0; plane < plane_count; ++plane) {
1295 image->planes[plane].surface.flags = radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1296 image->planes[plane].surface.modifier = modifier;
1297 }
1298
1299 if (image->vk.external_handle_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) {
1300 #if DETECT_OS_ANDROID
1301 image->vk.ahb_format = radv_ahb_format_for_vk_format(image->vk.format);
1302 #endif
1303
1304 *pImage = radv_image_to_handle(image);
1305 assert(!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1306 return VK_SUCCESS;
1307 }
1308
1309 VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, profile_list, image);
1310 if (result != VK_SUCCESS) {
1311 radv_destroy_image(device, alloc, image);
1312 return result;
1313 }
1314
1315 if (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1316 image->alignment = MAX2(image->alignment, 4096);
1317 image->size = align64(image->size, image->alignment);
1318 image->bindings[0].offset = 0;
1319
1320 result = device->ws->buffer_create(device->ws, image->size, image->alignment, 0, RADEON_FLAG_VIRTUAL,
1321 RADV_BO_PRIORITY_VIRTUAL, 0, &image->bindings[0].bo);
1322 if (result != VK_SUCCESS) {
1323 radv_destroy_image(device, alloc, image);
1324 return vk_error(device, result);
1325 }
1326 radv_rmv_log_bo_allocate(device, image->bindings[0].bo, image->size, true);
1327 }
1328
1329 if (device->instance->debug_flags & RADV_DEBUG_IMG) {
1330 radv_image_print_info(device, image);
1331 }
1332
1333 *pImage = radv_image_to_handle(image);
1334
1335 radv_rmv_log_image_create(device, pCreateInfo, is_internal, *pImage);
1336 if (image->bindings[0].bo)
1337 radv_rmv_log_image_bind(device, *pImage);
1338 return VK_SUCCESS;
1339 }
1340
1341 unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)1342 radv_plane_from_aspect(VkImageAspectFlags mask)
1343 {
1344 switch (mask) {
1345 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1346 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1347 return 1;
1348 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1349 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1350 return 2;
1351 case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1352 return 3;
1353 default:
1354 return 0;
1355 }
1356 }
1357
1358 VkFormat
radv_get_aspect_format(struct radv_image * image,VkImageAspectFlags mask)1359 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1360 {
1361 switch (mask) {
1362 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1363 return image->planes[0].format;
1364 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1365 return image->planes[1].format;
1366 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1367 return image->planes[2].format;
1368 case VK_IMAGE_ASPECT_STENCIL_BIT:
1369 return vk_format_stencil_only(image->vk.format);
1370 case VK_IMAGE_ASPECT_DEPTH_BIT:
1371 return vk_format_depth_only(image->vk.format);
1372 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1373 return vk_format_depth_only(image->vk.format);
1374 default:
1375 return image->vk.format;
1376 }
1377 }
1378
1379 bool
radv_layout_is_htile_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1380 radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
1381 unsigned queue_mask)
1382 {
1383 /* Don't compress exclusive images used on transfer queues when SDMA doesn't support HTILE.
1384 * Note that HTILE is already disabled on concurrent images when not supported.
1385 */
1386 if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->rad_info.sdma_supports_compression)
1387 return false;
1388
1389 switch (layout) {
1390 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
1391 case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
1392 case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
1393 case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL:
1394 return radv_image_has_htile(image);
1395 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
1396 return radv_image_is_tc_compat_htile(image) ||
1397 (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
1398 case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
1399 case VK_IMAGE_LAYOUT_GENERAL:
1400 /* It should be safe to enable TC-compat HTILE with
1401 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
1402 * if the image doesn't have the storage bit set. This
1403 * improves performance for apps that use GENERAL for the main
1404 * depth pass because this allows compression and this reduces
1405 * the number of decompressions from/to GENERAL.
1406 */
1407 if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
1408 !device->instance->drirc.disable_tc_compat_htile_in_general) {
1409 return true;
1410 } else {
1411 return false;
1412 }
1413 case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
1414 /* Do not compress HTILE with feedback loops because we can't read&write it without
1415 * introducing corruption.
1416 */
1417 return false;
1418 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
1419 case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
1420 if (radv_image_is_tc_compat_htile(image) ||
1421 (radv_image_has_htile(image) &&
1422 !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
1423 /* Keep HTILE compressed if the image is only going to
1424 * be used as a depth/stencil read-only attachment.
1425 */
1426 return true;
1427 } else {
1428 return false;
1429 }
1430 break;
1431 default:
1432 return radv_image_is_tc_compat_htile(image);
1433 }
1434 }
1435
1436 bool
radv_layout_can_fast_clear(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,unsigned queue_mask)1437 radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level,
1438 VkImageLayout layout, unsigned queue_mask)
1439 {
1440 if (radv_dcc_enabled(image, level) && !radv_layout_dcc_compressed(device, image, level, layout, queue_mask))
1441 return false;
1442
1443 if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
1444 return false;
1445
1446 if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
1447 return false;
1448
1449 /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
1450 * images can only be fast-cleared if comp-to-single is supported because we don't yet support
1451 * FCE on the compute queue.
1452 */
1453 return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
1454 }
1455
1456 bool
radv_layout_dcc_compressed(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,unsigned queue_mask)1457 radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
1458 VkImageLayout layout, unsigned queue_mask)
1459 {
1460 if (!radv_dcc_enabled(image, level))
1461 return false;
1462
1463 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
1464 return true;
1465
1466 /* If the image is read-only, we can always just keep it compressed */
1467 if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
1468 return true;
1469
1470 /* Don't compress compute transfer dst when image stores are not supported. */
1471 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
1472 (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
1473 return false;
1474
1475 /* Don't compress exclusive images used on transfer queues when SDMA doesn't support DCC.
1476 * Note that DCC is already disabled on concurrent images when not supported.
1477 */
1478 if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->rad_info.sdma_supports_compression)
1479 return false;
1480
1481 if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
1482 /* Do not compress DCC with feedback loops because we can't read&write it without introducing
1483 * corruption.
1484 */
1485 return false;
1486 }
1487
1488 return device->physical_device->rad_info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
1489 }
1490
1491 enum radv_fmask_compression
radv_layout_fmask_compression(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1492 radv_layout_fmask_compression(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
1493 unsigned queue_mask)
1494 {
1495 if (!radv_image_has_fmask(image))
1496 return RADV_FMASK_COMPRESSION_NONE;
1497
1498 if (layout == VK_IMAGE_LAYOUT_GENERAL)
1499 return RADV_FMASK_COMPRESSION_NONE;
1500
1501 /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
1502 * expanded before.
1503 */
1504 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1505 return RADV_FMASK_COMPRESSION_NONE;
1506
1507 /* Compress images if TC-compat CMASK is enabled. */
1508 if (radv_image_is_tc_compat_cmask(image))
1509 return RADV_FMASK_COMPRESSION_FULL;
1510
1511 switch (layout) {
1512 case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
1513 case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
1514 /* Don't compress images but no need to expand FMASK. */
1515 return RADV_FMASK_COMPRESSION_PARTIAL;
1516 case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
1517 /* Don't compress images that are in feedback loops. */
1518 return RADV_FMASK_COMPRESSION_NONE;
1519 default:
1520 /* Don't compress images that are concurrent. */
1521 return queue_mask == (1u << RADV_QUEUE_GENERAL) ? RADV_FMASK_COMPRESSION_FULL : RADV_FMASK_COMPRESSION_NONE;
1522 }
1523 }
1524
1525 unsigned
radv_image_queue_family_mask(const struct radv_image * image,enum radv_queue_family family,enum radv_queue_family queue_family)1526 radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family,
1527 enum radv_queue_family queue_family)
1528 {
1529 if (!image->exclusive)
1530 return image->queue_family_mask;
1531 if (family == RADV_QUEUE_FOREIGN)
1532 return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
1533 if (family == RADV_QUEUE_IGNORED)
1534 return 1u << queue_family;
1535 return 1u << family;
1536 }
1537
1538 bool
radv_image_is_renderable(const struct radv_device * device,const struct radv_image * image)1539 radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image)
1540 {
1541 if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1542 image->vk.format == VK_FORMAT_R32G32B32_SFLOAT)
1543 return false;
1544
1545 if (device->physical_device->rad_info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
1546 vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format))
1547 return false;
1548
1549 if (image->planes[0].surface.flags & RADEON_SURF_NO_RENDER_TARGET)
1550 return false;
1551
1552 return true;
1553 }
1554
1555 unsigned
radv_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)1556 radv_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
1557 {
1558 if (stencil)
1559 return plane->surface.u.legacy.zs.stencil_tiling_index[level];
1560 else
1561 return plane->surface.u.legacy.tiling_index[level];
1562 }
1563
1564 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateImage(VkDevice _device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)1565 radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator,
1566 VkImage *pImage)
1567 {
1568 #if DETECT_OS_ANDROID
1569 const VkNativeBufferANDROID *gralloc_info = vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1570
1571 if (gralloc_info)
1572 return radv_image_from_gralloc(_device, pCreateInfo, gralloc_info, pAllocator, pImage);
1573 #endif
1574
1575 #ifdef RADV_USE_WSI_PLATFORM
1576 /* Ignore swapchain creation info on Android. Since we don't have an implementation in Mesa,
1577 * we're guaranteed to access an Android object incorrectly.
1578 */
1579 RADV_FROM_HANDLE(radv_device, device, _device);
1580 const VkImageSwapchainCreateInfoKHR *swapchain_info =
1581 vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
1582 if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
1583 return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device, pCreateInfo,
1584 swapchain_info->swapchain, pImage);
1585 }
1586 #endif
1587
1588 const struct wsi_image_create_info *wsi_info = vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1589 bool scanout = wsi_info && wsi_info->scanout;
1590 bool prime_blit_src = wsi_info && wsi_info->blit_src;
1591
1592 return radv_image_create(_device,
1593 &(struct radv_image_create_info){
1594 .vk_info = pCreateInfo,
1595 .scanout = scanout,
1596 .prime_blit_src = prime_blit_src,
1597 },
1598 pAllocator, pImage, false);
1599 }
1600
1601 VKAPI_ATTR void VKAPI_CALL
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)1602 radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
1603 {
1604 RADV_FROM_HANDLE(radv_device, device, _device);
1605 RADV_FROM_HANDLE(radv_image, image, _image);
1606
1607 if (!image)
1608 return;
1609
1610 radv_destroy_image(device, pAllocator, image);
1611 }
1612
1613 VKAPI_ATTR VkResult VKAPI_CALL
radv_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)1614 radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindImageMemoryInfo *pBindInfos)
1615 {
1616 RADV_FROM_HANDLE(radv_device, device, _device);
1617
1618 for (uint32_t i = 0; i < bindInfoCount; ++i) {
1619 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
1620 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
1621 VkBindMemoryStatusKHR *status = (void *)vk_find_struct_const(&pBindInfos[i], BIND_MEMORY_STATUS_KHR);
1622
1623 if (status)
1624 *status->pResult = VK_SUCCESS;
1625
1626 /* Ignore this struct on Android, we cannot access swapchain structures there. */
1627 #ifdef RADV_USE_WSI_PLATFORM
1628 const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
1629 vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
1630
1631 if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
1632 struct radv_image *swapchain_img =
1633 radv_image_from_handle(wsi_common_get_image(swapchain_info->swapchain, swapchain_info->imageIndex));
1634
1635 image->bindings[0].bo = swapchain_img->bindings[0].bo;
1636 image->bindings[0].offset = swapchain_img->bindings[0].offset;
1637 continue;
1638 }
1639 #endif
1640
1641 if (mem->alloc_size) {
1642 VkImageMemoryRequirementsInfo2 info = {
1643 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1644 .image = pBindInfos[i].image,
1645 };
1646 VkMemoryRequirements2 reqs = {
1647 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1648 };
1649
1650 radv_GetImageMemoryRequirements2(_device, &info, &reqs);
1651
1652 if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
1653 if (status)
1654 *status->pResult = VK_ERROR_UNKNOWN;
1655 return vk_errorf(device, VK_ERROR_UNKNOWN, "Device memory object too small for the image.\n");
1656 }
1657 }
1658
1659 if (image->disjoint) {
1660 const VkBindImagePlaneMemoryInfo *plane_info =
1661 vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_PLANE_MEMORY_INFO);
1662
1663 switch (plane_info->planeAspect) {
1664 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1665 image->bindings[0].bo = mem->bo;
1666 image->bindings[0].offset = pBindInfos[i].memoryOffset;
1667 break;
1668 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1669 image->bindings[1].bo = mem->bo;
1670 image->bindings[1].offset = pBindInfos[i].memoryOffset;
1671 break;
1672 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1673 image->bindings[2].bo = mem->bo;
1674 image->bindings[2].offset = pBindInfos[i].memoryOffset;
1675 break;
1676 default:
1677 break;
1678 }
1679 } else {
1680 image->bindings[0].bo = mem->bo;
1681 image->bindings[0].offset = pBindInfos[i].memoryOffset;
1682 }
1683 radv_rmv_log_image_bind(device, pBindInfos[i].image);
1684 }
1685 return VK_SUCCESS;
1686 }
1687
1688 VKAPI_ATTR void VKAPI_CALL
radv_GetImageSubresourceLayout2KHR(VkDevice _device,VkImage _image,const VkImageSubresource2KHR * pSubresource,VkSubresourceLayout2KHR * pLayout)1689 radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkImageSubresource2KHR *pSubresource,
1690 VkSubresourceLayout2KHR *pLayout)
1691 {
1692 RADV_FROM_HANDLE(radv_image, image, _image);
1693 RADV_FROM_HANDLE(radv_device, device, _device);
1694 int level = pSubresource->imageSubresource.mipLevel;
1695 int layer = pSubresource->imageSubresource.arrayLayer;
1696
1697 const unsigned plane_count = vk_format_get_plane_count(image->vk.format);
1698 unsigned plane_id = 0;
1699 if (plane_count > 1)
1700 plane_id = radv_plane_from_aspect(pSubresource->imageSubresource.aspectMask);
1701
1702 struct radv_image_plane *plane = &image->planes[plane_id];
1703 struct radeon_surf *surface = &plane->surface;
1704
1705 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && plane_count == 1) {
1706 unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->imageSubresource.aspectMask);
1707
1708 assert(level == 0);
1709 assert(layer == 0);
1710
1711 pLayout->subresourceLayout.offset =
1712 ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, surface, mem_plane_id, 0);
1713 pLayout->subresourceLayout.rowPitch =
1714 ac_surface_get_plane_stride(device->physical_device->rad_info.gfx_level, surface, mem_plane_id, level);
1715 pLayout->subresourceLayout.arrayPitch = 0;
1716 pLayout->subresourceLayout.depthPitch = 0;
1717 pLayout->subresourceLayout.size = ac_surface_get_plane_size(surface, mem_plane_id);
1718 } else if (device->physical_device->rad_info.gfx_level >= GFX9) {
1719 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1720
1721 pLayout->subresourceLayout.offset =
1722 ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &plane->surface, 0, layer) +
1723 level_offset;
1724 if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1725 image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
1726 /* Adjust the number of bytes between each row because
1727 * the pitch is actually the number of components per
1728 * row.
1729 */
1730 pLayout->subresourceLayout.rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1731 } else {
1732 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1733
1734 assert(util_is_power_of_two_nonzero(surface->bpe));
1735 pLayout->subresourceLayout.rowPitch = pitch * surface->bpe;
1736 }
1737
1738 pLayout->subresourceLayout.arrayPitch = surface->u.gfx9.surf_slice_size;
1739 pLayout->subresourceLayout.depthPitch = surface->u.gfx9.surf_slice_size;
1740 pLayout->subresourceLayout.size = surface->u.gfx9.surf_slice_size;
1741 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1742 pLayout->subresourceLayout.size *= u_minify(image->vk.extent.depth, level);
1743 } else {
1744 pLayout->subresourceLayout.offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
1745 (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1746 pLayout->subresourceLayout.rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1747 pLayout->subresourceLayout.arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1748 pLayout->subresourceLayout.depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1749 pLayout->subresourceLayout.size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1750 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1751 pLayout->subresourceLayout.size *= u_minify(image->vk.extent.depth, level);
1752 }
1753
1754 VkImageCompressionPropertiesEXT *image_compression_props =
1755 vk_find_struct(pLayout->pNext, IMAGE_COMPRESSION_PROPERTIES_EXT);
1756 if (image_compression_props) {
1757 image_compression_props->imageCompressionFixedRateFlags = VK_IMAGE_COMPRESSION_FIXED_RATE_NONE_EXT;
1758
1759 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1760 image_compression_props->imageCompressionFlags =
1761 radv_image_has_htile(image) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT;
1762 } else {
1763 image_compression_props->imageCompressionFlags =
1764 radv_image_has_dcc(image) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT;
1765 }
1766 }
1767 }
1768
1769 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,VkImage _image,VkImageDrmFormatModifierPropertiesEXT * pProperties)1770 radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
1771 VkImageDrmFormatModifierPropertiesEXT *pProperties)
1772 {
1773 RADV_FROM_HANDLE(radv_image, image, _image);
1774
1775 pProperties->drmFormatModifier = image->planes[0].surface.modifier;
1776 return VK_SUCCESS;
1777 }
1778