1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "ac_drm_fourcc.h"
29 #include "util/debug.h"
30 #include "util/u_atomic.h"
31 #include "vulkan/util/vk_format.h"
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_radeon_winsys.h"
35 #include "sid.h"
36 #include "vk_format.h"
37 #include "vk_util.h"
38
39 #include "gfx10_format_table.h"
40
41 static unsigned
radv_choose_tiling(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)42 radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
43 VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 /* MSAA resources must be 2D tiled. */
51 if (pCreateInfo->samples > 1)
52 return RADEON_SURF_MODE_2D;
53
54 if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
55 device->physical_device->rad_info.gfx_level <= GFX8) {
56 /* this causes hangs in some VK CTS tests on GFX9. */
57 /* Textures with a very small height are recommended to be linear. */
58 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
59 /* Only very thin and long 2D textures should benefit from
60 * linear_aligned. */
61 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
62 return RADEON_SURF_MODE_LINEAR_ALIGNED;
63 }
64
65 return RADEON_SURF_MODE_2D;
66 }
67
68 static bool
radv_use_tc_compat_htile_for_image(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)69 radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
70 VkFormat format)
71 {
72 /* TC-compat HTILE is only available for GFX8+. */
73 if (device->physical_device->rad_info.gfx_level < GFX8)
74 return false;
75
76 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
77 return false;
78
79 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
80 return false;
81
82 /* Do not enable TC-compatible HTILE if the image isn't readable by a
83 * shader because no texture fetches will happen.
84 */
85 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
86 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
87 return false;
88
89 if (device->physical_device->rad_info.gfx_level < GFX9) {
90 /* TC-compat HTILE for MSAA depth/stencil images is broken
91 * on GFX8 because the tiling doesn't match.
92 */
93 if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
94 return false;
95
96 /* GFX9+ supports compression for both 32-bit and 16-bit depth
97 * surfaces, while GFX8 only supports 32-bit natively. Though,
98 * the driver allows TC-compat HTILE for 16-bit depth surfaces
99 * with no Z planes compression.
100 */
101 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
102 format != VK_FORMAT_D16_UNORM)
103 return false;
104 }
105
106 return true;
107 }
108
109 static bool
radv_surface_has_scanout(struct radv_device * device,const struct radv_image_create_info * info)110 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
111 {
112 if (info->bo_metadata) {
113 if (device->physical_device->rad_info.gfx_level >= GFX9)
114 return info->bo_metadata->u.gfx9.scanout;
115 else
116 return info->bo_metadata->u.legacy.scanout;
117 }
118
119 return info->scanout;
120 }
121
122 static bool
radv_image_use_fast_clear_for_image_early(const struct radv_device * device,const struct radv_image * image)123 radv_image_use_fast_clear_for_image_early(const struct radv_device *device,
124 const struct radv_image *image)
125 {
126 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
127 return true;
128
129 if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) {
130 /* Do not enable CMASK or DCC for small surfaces where the cost
131 * of the eliminate pass can be higher than the benefit of fast
132 * clear. RadeonSI does this, but the image threshold is
133 * different.
134 */
135 return false;
136 }
137
138 return !!(image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
139 }
140
141 static bool
radv_image_use_fast_clear_for_image(const struct radv_device * device,const struct radv_image * image)142 radv_image_use_fast_clear_for_image(const struct radv_device *device,
143 const struct radv_image *image)
144 {
145 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
146 return true;
147
148 return radv_image_use_fast_clear_for_image_early(device, image) &&
149 (image->exclusive ||
150 /* Enable DCC for concurrent images if stores are
151 * supported because that means we can keep DCC compressed on
152 * all layouts/queues.
153 */
154 radv_image_use_dcc_image_stores(device, image));
155 }
156
157 bool
radv_are_formats_dcc_compatible(const struct radv_physical_device * pdev,const void * pNext,VkFormat format,VkImageCreateFlags flags,bool * sign_reinterpret)158 radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
159 VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret)
160 {
161 bool blendable;
162
163 if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
164 return false;
165
166 if (sign_reinterpret != NULL)
167 *sign_reinterpret = false;
168
169 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
170 const struct VkImageFormatListCreateInfo *format_list =
171 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
172 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
173
174 /* We have to ignore the existence of the list if viewFormatCount = 0 */
175 if (format_list && format_list->viewFormatCount) {
176 /* compatibility is transitive, so we only need to check
177 * one format with everything else. */
178 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
179 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
180 continue;
181
182 if (!radv_dcc_formats_compatible(pdev->rad_info.gfx_level, format,
183 format_list->pViewFormats[i], sign_reinterpret))
184 return false;
185 }
186 } else {
187 return false;
188 }
189 }
190
191 return true;
192 }
193
194 static bool
radv_format_is_atomic_allowed(struct radv_device * device,VkFormat format)195 radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
196 {
197 if (format == VK_FORMAT_R32_SFLOAT && !device->image_float32_atomics)
198 return false;
199
200 return radv_is_atomic_format_supported(format);
201 }
202
203 static bool
radv_formats_is_atomic_allowed(struct radv_device * device,const void * pNext,VkFormat format,VkImageCreateFlags flags)204 radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format,
205 VkImageCreateFlags flags)
206 {
207 if (radv_format_is_atomic_allowed(device, format))
208 return true;
209
210 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
211 const struct VkImageFormatListCreateInfo *format_list =
212 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
213 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
214
215 /* We have to ignore the existence of the list if viewFormatCount = 0 */
216 if (format_list && format_list->viewFormatCount) {
217 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
218 if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
219 return true;
220 }
221 }
222 }
223
224 return false;
225 }
226
227 static bool
radv_use_dcc_for_image_early(struct radv_device * device,struct radv_image * image,const VkImageCreateInfo * pCreateInfo,VkFormat format,bool * sign_reinterpret)228 radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image,
229 const VkImageCreateInfo *pCreateInfo, VkFormat format,
230 bool *sign_reinterpret)
231 {
232 /* DCC (Delta Color Compression) is only available for GFX8+. */
233 if (device->physical_device->rad_info.gfx_level < GFX8)
234 return false;
235
236 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
237 return false;
238
239 if (image->shareable && image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
240 return false;
241
242 /*
243 * TODO: Enable DCC for storage images on GFX9 and earlier.
244 *
245 * Also disable DCC with atomics because even when DCC stores are
246 * supported atomics will always decompress. So if we are
247 * decompressing a lot anyway we might as well not have DCC.
248 */
249 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
250 (device->physical_device->rad_info.gfx_level < GFX10 ||
251 radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
252 return false;
253
254 /* Do not enable DCC for fragment shading rate attachments. */
255 if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
256 return false;
257
258 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
259 return false;
260
261 if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
262 return false;
263
264 if (!radv_image_use_fast_clear_for_image_early(device, image) &&
265 image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
266 return false;
267
268 /* Do not enable DCC for mipmapped arrays because performance is worse. */
269 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
270 return false;
271
272 if (device->physical_device->rad_info.gfx_level < GFX10) {
273 /* TODO: Add support for DCC MSAA on GFX8-9. */
274 if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
275 return false;
276
277 /* TODO: Add support for DCC layers/mipmaps on GFX9. */
278 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
279 device->physical_device->rad_info.gfx_level == GFX9)
280 return false;
281 }
282
283 return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
284 pCreateInfo->flags, sign_reinterpret);
285 }
286
287 static bool
radv_use_dcc_for_image_late(struct radv_device * device,struct radv_image * image)288 radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
289 {
290 if (!radv_image_has_dcc(image))
291 return false;
292
293 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
294 return true;
295
296 if (!radv_image_use_fast_clear_for_image(device, image))
297 return false;
298
299 /* TODO: Fix storage images with DCC without DCC image stores.
300 * Disabling it for now. */
301 if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
302 !radv_image_use_dcc_image_stores(device, image))
303 return false;
304
305 return true;
306 }
307
308 /*
309 * Whether to enable image stores with DCC compression for this image. If
310 * this function returns false the image subresource should be decompressed
311 * before using it with image stores.
312 *
313 * Note that this can have mixed performance implications, see
314 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
315 *
316 * This function assumes the image uses DCC compression.
317 */
318 bool
radv_image_use_dcc_image_stores(const struct radv_device * device,const struct radv_image * image)319 radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
320 {
321 return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.gfx_level,
322 &image->planes[0].surface);
323 }
324
325 /*
326 * Whether to use a predicate to determine whether DCC is in a compressed
327 * state. This can be used to avoid decompressing an image multiple times.
328 */
329 bool
radv_image_use_dcc_predication(const struct radv_device * device,const struct radv_image * image)330 radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
331 {
332 return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
333 }
334
335 static inline bool
radv_use_fmask_for_image(const struct radv_device * device,const struct radv_image * image)336 radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
337 {
338 return image->info.samples > 1 && ((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
339 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
340 }
341
342 static inline bool
radv_use_htile_for_image(const struct radv_device * device,const struct radv_image * image)343 radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image)
344 {
345 /* TODO:
346 * - Investigate about mips+layers.
347 * - Enable on other gens.
348 */
349 bool use_htile_for_mips =
350 image->info.array_size == 1 && device->physical_device->rad_info.gfx_level >= GFX10;
351
352 /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
353 if (device->physical_device->rad_info.gfx_level == GFX10 &&
354 image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1)
355 return false;
356
357 /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
358 if (device->physical_device->rad_info.chip_class == GFX10 &&
359 image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1)
360 return false;
361
362 /* Do not enable HTILE for very small images because it seems less performant but make sure it's
363 * allowed with VRS attachments because we need HTILE.
364 */
365 if (image->info.width * image->info.height < 8 * 8 &&
366 !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
367 !device->attachment_vrs_enabled)
368 return false;
369
370 return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable;
371 }
372
373 static bool
radv_use_tc_compat_cmask_for_image(struct radv_device * device,struct radv_image * image)374 radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
375 {
376 /* TC-compat CMASK is only available for GFX8+. */
377 if (device->physical_device->rad_info.gfx_level < GFX8)
378 return false;
379
380 if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
381 return false;
382
383 /* TC-compat CMASK with storage images is supported on GFX10+. */
384 if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
385 device->physical_device->rad_info.gfx_level < GFX10)
386 return false;
387
388 /* Do not enable TC-compatible if the image isn't readable by a shader
389 * because no texture fetches will happen.
390 */
391 if (!(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
392 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
393 return false;
394
395 /* If the image doesn't have FMASK, it can't be fetchable. */
396 if (!radv_image_has_fmask(image))
397 return false;
398
399 return true;
400 }
401
402 static uint32_t
si_get_bo_metadata_word1(const struct radv_device * device)403 si_get_bo_metadata_word1(const struct radv_device *device)
404 {
405 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
406 }
407
408 static bool
radv_is_valid_opaque_metadata(const struct radv_device * device,const struct radeon_bo_metadata * md)409 radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
410 {
411 if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device))
412 return false;
413
414 if (md->size_metadata < 40)
415 return false;
416
417 return true;
418 }
419
420 static void
radv_patch_surface_from_metadata(struct radv_device * device,struct radeon_surf * surface,const struct radeon_bo_metadata * md)421 radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
422 const struct radeon_bo_metadata *md)
423 {
424 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
425
426 if (device->physical_device->rad_info.gfx_level >= GFX9) {
427 if (md->u.gfx9.swizzle_mode > 0)
428 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
429 else
430 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
431
432 surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
433 } else {
434 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
435 surface->u.legacy.bankw = md->u.legacy.bankw;
436 surface->u.legacy.bankh = md->u.legacy.bankh;
437 surface->u.legacy.tile_split = md->u.legacy.tile_split;
438 surface->u.legacy.mtilea = md->u.legacy.mtilea;
439 surface->u.legacy.num_banks = md->u.legacy.num_banks;
440
441 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
442 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
443 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
444 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
445 else
446 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
447 }
448 }
449
450 static VkResult
radv_patch_image_dimensions(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)451 radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
452 const struct radv_image_create_info *create_info,
453 struct ac_surf_info *image_info)
454 {
455 unsigned width = image->info.width;
456 unsigned height = image->info.height;
457
458 /*
459 * minigbm sometimes allocates bigger images which is going to result in
460 * weird strides and other properties. Lets be lenient where possible and
461 * fail it on GFX10 (as we cannot cope there).
462 *
463 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
464 */
465 if (create_info->bo_metadata &&
466 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
467 const struct radeon_bo_metadata *md = create_info->bo_metadata;
468
469 if (device->physical_device->rad_info.gfx_level >= GFX10) {
470 width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
471 height = G_00A008_HEIGHT(md->metadata[4]) + 1;
472 } else {
473 width = G_008F18_WIDTH(md->metadata[4]) + 1;
474 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
475 }
476 }
477
478 if (image->info.width == width && image->info.height == height)
479 return VK_SUCCESS;
480
481 if (width < image->info.width || height < image->info.height) {
482 fprintf(stderr,
483 "The imported image has smaller dimensions than the internal\n"
484 "dimensions. Using it is going to fail badly, so we reject\n"
485 "this import.\n"
486 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
487 image->info.width, image->info.height, width, height);
488 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
489 } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
490 fprintf(stderr,
491 "Tried to import an image with inconsistent width on GFX10.\n"
492 "As GFX10 has no separate stride fields we cannot cope with\n"
493 "an inconsistency in width and will fail this import.\n"
494 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
495 image->info.width, image->info.height, width, height);
496 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
497 } else {
498 fprintf(stderr,
499 "Tried to import an image with inconsistent width on pre-GFX10.\n"
500 "As GFX10 has no separate stride fields we cannot cope with\n"
501 "an inconsistency and would fail on GFX10.\n"
502 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
503 image->info.width, image->info.height, width, height);
504 }
505 image_info->width = width;
506 image_info->height = height;
507
508 return VK_SUCCESS;
509 }
510
511 static VkResult
radv_patch_image_from_extra_info(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)512 radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
513 const struct radv_image_create_info *create_info,
514 struct ac_surf_info *image_info)
515 {
516 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
517 if (result != VK_SUCCESS)
518 return result;
519
520 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
521 if (create_info->bo_metadata) {
522 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
523 create_info->bo_metadata);
524 }
525
526 if (radv_surface_has_scanout(device, create_info)) {
527 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
528 if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
529 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
530
531 image->info.surf_index = NULL;
532 }
533
534 if (create_info->prime_blit_src && device->physical_device->rad_info.gfx_level == GFX9) {
535 /* Older SDMA hw can't handle DCC */
536 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
537 }
538 }
539 return VK_SUCCESS;
540 }
541
542 static VkFormat
etc2_emulation_format(VkFormat format)543 etc2_emulation_format(VkFormat format)
544 {
545 switch (format) {
546 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
547 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
548 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
549 return VK_FORMAT_R8G8B8A8_UNORM;
550 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
551 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
552 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
553 return VK_FORMAT_R8G8B8A8_SRGB;
554 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
555 return VK_FORMAT_R16_UNORM;
556 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
557 return VK_FORMAT_R16_SNORM;
558 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
559 return VK_FORMAT_R16G16_UNORM;
560 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
561 return VK_FORMAT_R16G16_SNORM;
562 default:
563 unreachable("Unhandled ETC format");
564 }
565 }
566
567 static VkFormat
radv_image_get_plane_format(const struct radv_physical_device * pdev,const struct radv_image * image,unsigned plane)568 radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image,
569 unsigned plane)
570 {
571 if (pdev->emulate_etc2 &&
572 vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
573 if (plane == 0)
574 return image->vk.format;
575 return etc2_emulation_format(image->vk.format);
576 }
577 return vk_format_get_plane_format(image->vk.format, plane);
578 }
579
580 static uint64_t
radv_get_surface_flags(struct radv_device * device,struct radv_image * image,unsigned plane_id,const VkImageCreateInfo * pCreateInfo,VkFormat image_format)581 radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
582 const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
583 {
584 uint64_t flags;
585 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
586 VkFormat format = radv_image_get_plane_format(device->physical_device, image, plane_id);
587 const struct util_format_description *desc = vk_format_description(format);
588 bool is_depth, is_stencil;
589
590 is_depth = util_format_has_depth(desc);
591 is_stencil = util_format_has_stencil(desc);
592
593 flags = RADEON_SURF_SET(array_mode, MODE);
594
595 switch (pCreateInfo->imageType) {
596 case VK_IMAGE_TYPE_1D:
597 if (pCreateInfo->arrayLayers > 1)
598 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
599 else
600 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
601 break;
602 case VK_IMAGE_TYPE_2D:
603 if (pCreateInfo->arrayLayers > 1)
604 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
605 else
606 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
607 break;
608 case VK_IMAGE_TYPE_3D:
609 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
610 break;
611 default:
612 unreachable("unhandled image type");
613 }
614
615 /* Required for clearing/initializing a specific layer on GFX8. */
616 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
617
618 if (is_depth) {
619 flags |= RADEON_SURF_ZBUFFER;
620
621 if (radv_use_htile_for_image(device, image) &&
622 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
623 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
624 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
625 } else {
626 flags |= RADEON_SURF_NO_HTILE;
627 }
628 }
629
630 if (is_stencil)
631 flags |= RADEON_SURF_SBUFFER;
632
633 if (device->physical_device->rad_info.gfx_level >= GFX9 &&
634 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
635 vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
636 flags |= RADEON_SURF_NO_RENDER_TARGET;
637
638 if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format,
639 &image->dcc_sign_reinterpret))
640 flags |= RADEON_SURF_DISABLE_DCC;
641
642 if (!radv_use_fmask_for_image(device, image))
643 flags |= RADEON_SURF_NO_FMASK;
644
645 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
646 flags |=
647 RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
648 }
649
650 return flags;
651 }
652
653 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)654 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
655 {
656 if (stencil)
657 return plane->surface.u.legacy.zs.stencil_tiling_index[level];
658 else
659 return plane->surface.u.legacy.tiling_index[level];
660 }
661
662 static unsigned
radv_map_swizzle(unsigned swizzle)663 radv_map_swizzle(unsigned swizzle)
664 {
665 switch (swizzle) {
666 case PIPE_SWIZZLE_Y:
667 return V_008F0C_SQ_SEL_Y;
668 case PIPE_SWIZZLE_Z:
669 return V_008F0C_SQ_SEL_Z;
670 case PIPE_SWIZZLE_W:
671 return V_008F0C_SQ_SEL_W;
672 case PIPE_SWIZZLE_0:
673 return V_008F0C_SQ_SEL_0;
674 case PIPE_SWIZZLE_1:
675 return V_008F0C_SQ_SEL_1;
676 default: /* PIPE_SWIZZLE_X */
677 return V_008F0C_SQ_SEL_X;
678 }
679 }
680
681 static void
radv_compose_swizzle(const struct util_format_description * desc,const VkComponentMapping * mapping,enum pipe_swizzle swizzle[4])682 radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
683 enum pipe_swizzle swizzle[4])
684 {
685 if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
686 /* 64-bit formats only support storage images and storage images
687 * require identity component mappings. We use 32-bit
688 * instructions to access 64-bit images, so we need a special
689 * case here.
690 *
691 * The zw components are 1,0 so that they can be easily be used
692 * by loads to create the w component, which has to be 0 for
693 * NULL descriptors.
694 */
695 swizzle[0] = PIPE_SWIZZLE_X;
696 swizzle[1] = PIPE_SWIZZLE_Y;
697 swizzle[2] = PIPE_SWIZZLE_1;
698 swizzle[3] = PIPE_SWIZZLE_0;
699 } else if (!mapping) {
700 for (unsigned i = 0; i < 4; i++)
701 swizzle[i] = desc->swizzle[i];
702 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
703 const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
704 PIPE_SWIZZLE_1};
705 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
706 } else {
707 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
708 }
709 }
710
711 static void
radv_make_buffer_descriptor(struct radv_device * device,struct radv_buffer * buffer,VkFormat vk_format,unsigned offset,unsigned range,uint32_t * state)712 radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer,
713 VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state)
714 {
715 const struct util_format_description *desc;
716 unsigned stride;
717 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
718 uint64_t va = gpu_address + buffer->offset;
719 unsigned num_format, data_format;
720 int first_non_void;
721 enum pipe_swizzle swizzle[4];
722 desc = vk_format_description(vk_format);
723 first_non_void = vk_format_get_first_non_void_channel(vk_format);
724 stride = desc->block.bits / 8;
725
726 radv_compose_swizzle(desc, NULL, swizzle);
727
728 va += offset;
729 state[0] = va;
730 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
731
732 if (device->physical_device->rad_info.gfx_level != GFX8 && stride) {
733 range /= stride;
734 }
735
736 state[2] = range;
737 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
738 S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
739 S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
740 S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
741
742 if (device->physical_device->rad_info.gfx_level >= GFX10) {
743 const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)];
744
745 /* OOB_SELECT chooses the out-of-bounds check:
746 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
747 * - 1: index >= NUM_RECORDS
748 * - 2: NUM_RECORDS == 0
749 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
750 * else: swizzle_address >= NUM_RECORDS
751 */
752 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
753 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
754 S_008F0C_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level < GFX11);
755 } else {
756 num_format = radv_translate_buffer_numformat(desc, first_non_void);
757 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
758
759 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
760 assert(num_format != ~0);
761
762 state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
763 }
764 }
765
766 static void
si_set_mutable_tex_desc_fields(struct radv_device * device,struct radv_image * image,const struct legacy_surf_level * base_level_info,unsigned plane_id,unsigned base_level,unsigned first_level,unsigned block_width,bool is_stencil,bool is_storage_image,bool disable_compression,bool enable_write_compression,uint32_t * state)767 si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
768 const struct legacy_surf_level *base_level_info, unsigned plane_id,
769 unsigned base_level, unsigned first_level, unsigned block_width,
770 bool is_stencil, bool is_storage_image, bool disable_compression,
771 bool enable_write_compression, uint32_t *state)
772 {
773 struct radv_image_plane *plane = &image->planes[plane_id];
774 struct radv_image_binding *binding = image->disjoint ? &image->bindings[plane_id] : &image->bindings[0];
775 uint64_t gpu_address = binding->bo ? radv_buffer_get_va(binding->bo) + binding->offset : 0;
776 uint64_t va = gpu_address;
777 enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
778 uint64_t meta_va = 0;
779 if (gfx_level >= GFX9) {
780 if (is_stencil)
781 va += plane->surface.u.gfx9.zs.stencil_offset;
782 else
783 va += plane->surface.u.gfx9.surf_offset;
784 } else
785 va += (uint64_t)base_level_info->offset_256B * 256;
786
787 state[0] = va >> 8;
788 if (gfx_level >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
789 state[0] |= plane->surface.tile_swizzle;
790 state[1] &= C_008F14_BASE_ADDRESS_HI;
791 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
792
793 if (gfx_level >= GFX8) {
794 state[6] &= C_008F28_COMPRESSION_EN;
795 state[7] = 0;
796 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
797 meta_va = gpu_address + plane->surface.meta_offset;
798 if (gfx_level <= GFX8)
799 meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
800
801 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
802 dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1;
803 meta_va |= dcc_tile_swizzle;
804 } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) {
805 meta_va = gpu_address + plane->surface.meta_offset;
806 }
807
808 if (meta_va) {
809 state[6] |= S_008F28_COMPRESSION_EN(1);
810 if (gfx_level <= GFX9)
811 state[7] = meta_va >> 8;
812 }
813 }
814
815 if (gfx_level >= GFX10) {
816 state[3] &= C_00A00C_SW_MODE;
817
818 if (is_stencil) {
819 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
820 } else {
821 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
822 }
823
824 state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED;
825
826 if (meta_va) {
827 struct gfx9_surf_meta_flags meta = {
828 .rb_aligned = 1,
829 .pipe_aligned = 1,
830 };
831
832 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
833 meta = plane->surface.u.gfx9.color.dcc;
834
835 if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
836 state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
837
838 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
839 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
840 }
841
842 state[7] = meta_va >> 16;
843 } else if (gfx_level == GFX9) {
844 state[3] &= C_008F1C_SW_MODE;
845 state[4] &= C_008F20_PITCH;
846
847 if (is_stencil) {
848 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
849 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.zs.stencil_epitch);
850 } else {
851 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
852 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch);
853 }
854
855 state[5] &=
856 C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
857 if (meta_va) {
858 struct gfx9_surf_meta_flags meta = {
859 .rb_aligned = 1,
860 .pipe_aligned = 1,
861 };
862
863 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
864 meta = plane->surface.u.gfx9.color.dcc;
865
866 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
867 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
868 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
869 }
870 } else {
871 /* GFX6-GFX8 */
872 unsigned pitch = base_level_info->nblk_x * block_width;
873 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
874
875 state[3] &= C_008F1C_TILING_INDEX;
876 state[3] |= S_008F1C_TILING_INDEX(index);
877 state[4] &= C_008F20_PITCH;
878 state[4] |= S_008F20_PITCH(pitch - 1);
879 }
880 }
881
882 static unsigned
radv_tex_dim(VkImageType image_type,VkImageViewType view_type,unsigned nr_layers,unsigned nr_samples,bool is_storage_image,bool gfx9)883 radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
884 unsigned nr_samples, bool is_storage_image, bool gfx9)
885 {
886 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
887 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
888
889 /* GFX9 allocates 1D textures as 2D. */
890 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
891 image_type = VK_IMAGE_TYPE_2D;
892 switch (image_type) {
893 case VK_IMAGE_TYPE_1D:
894 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
895 case VK_IMAGE_TYPE_2D:
896 if (nr_samples > 1)
897 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
898 else
899 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
900 case VK_IMAGE_TYPE_3D:
901 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
902 return V_008F1C_SQ_RSRC_IMG_3D;
903 else
904 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
905 default:
906 unreachable("illegal image type");
907 }
908 }
909
910 static unsigned
gfx9_border_color_swizzle(const struct util_format_description * desc)911 gfx9_border_color_swizzle(const struct util_format_description *desc)
912 {
913 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
914
915 if (desc->swizzle[3] == PIPE_SWIZZLE_X) {
916 /* For the pre-defined border color values (white, opaque
917 * black, transparent black), the only thing that matters is
918 * that the alpha channel winds up in the correct place
919 * (because the RGB channels are all the same) so either of
920 * these enumerations will work.
921 */
922 if (desc->swizzle[2] == PIPE_SWIZZLE_Y)
923 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
924 else
925 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
926 } else if (desc->swizzle[0] == PIPE_SWIZZLE_X) {
927 if (desc->swizzle[1] == PIPE_SWIZZLE_Y)
928 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
929 else
930 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
931 } else if (desc->swizzle[1] == PIPE_SWIZZLE_X) {
932 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
933 } else if (desc->swizzle[2] == PIPE_SWIZZLE_X) {
934 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
935 }
936
937 return bc_swizzle;
938 }
939
940 bool
vi_alpha_is_on_msb(struct radv_device * device,VkFormat format)941 vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
942 {
943 const struct util_format_description *desc = vk_format_description(format);
944
945 if (device->physical_device->rad_info.gfx_level >= GFX10 && desc->nr_channels == 1)
946 return desc->swizzle[3] == PIPE_SWIZZLE_X;
947
948 return radv_translate_colorswap(format, false) <= 1;
949 }
950 /**
951 * Build the sampler view descriptor for a texture (GFX10).
952 */
953 static void
gfx10_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,float min_lod,uint32_t * state,uint32_t * fmask_state,VkImageCreateFlags img_create_flags)954 gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
955 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
956 const VkComponentMapping *mapping, unsigned first_level,
957 unsigned last_level, unsigned first_layer, unsigned last_layer,
958 unsigned width, unsigned height, unsigned depth, float min_lod,
959 uint32_t *state, uint32_t *fmask_state,
960 VkImageCreateFlags img_create_flags)
961 {
962 const struct util_format_description *desc;
963 enum pipe_swizzle swizzle[4];
964 unsigned img_format;
965 unsigned type;
966
967 desc = vk_format_description(vk_format);
968
969 /* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so
970 * that border colors work correctly (alpha forced to 1). Since Vulkan has no such format,
971 * this uses the Gallium formats to set the description. */
972 if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK &&
973 vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
974 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM);
975 } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK &&
976 vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
977 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
978 }
979
980 img_format = ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)].img_format;
981
982 radv_compose_swizzle(desc, mapping, swizzle);
983
984 if (img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
985 assert(image->vk.image_type == VK_IMAGE_TYPE_3D);
986 type = V_008F1C_SQ_RSRC_IMG_3D;
987 } else {
988 type = radv_tex_dim(image->vk.image_type, view_type, image->info.array_size, image->info.samples,
989 is_storage_image, device->physical_device->rad_info.gfx_level == GFX9);
990 }
991
992 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
993 height = 1;
994 depth = image->info.array_size;
995 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
996 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
997 depth = image->info.array_size;
998 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
999 depth = image->info.array_size / 6;
1000
1001 state[0] = 0;
1002 state[1] = S_00A004_MIN_LOD(radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8)) |
1003 S_00A004_FORMAT(img_format) |
1004 S_00A004_WIDTH_LO(width - 1);
1005 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
1006 S_00A008_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level < GFX11);
1007 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1008 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1009 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1010 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1011 S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1012 S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1013 : last_level) |
1014 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type);
1015 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
1016 * to know the total number of layers.
1017 */
1018 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
1019 S_00A010_BASE_ARRAY(first_layer);
1020 state[5] = S_00A014_ARRAY_PITCH(0) |
1021 S_00A014_PERF_MOD(4);
1022 state[6] = 0;
1023 state[7] = 0;
1024
1025 if (img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
1026 assert(type == V_008F1C_SQ_RSRC_IMG_3D);
1027
1028 /* ARRAY_PITCH is only meaningful for 3D images, 0 means SRV, 1 means UAV.
1029 * In SRV mode, BASE_ARRAY is ignored and DEPTH is the last slice of mipmap level 0.
1030 * In UAV mode, BASE_ARRAY is the first slice and DEPTH is the last slice of the bound level.
1031 */
1032 state[4] &= C_00A010_DEPTH;
1033 state[4] |= S_00A010_DEPTH(!is_storage_image ? depth - 1 : u_minify(depth, first_level) - 1);
1034 state[5] |= S_00A014_ARRAY_PITCH(is_storage_image);
1035 }
1036
1037 unsigned max_mip =
1038 image->info.samples > 1 ? util_logbase2(image->info.samples) : image->info.levels - 1;
1039
1040 if (device->physical_device->rad_info.gfx_level >= GFX11) {
1041 state[1] |= S_00A004_MAX_MIP(max_mip);
1042 } else {
1043 state[5] |= S_00A014_MAX_MIP(max_mip);
1044 }
1045
1046 if (radv_dcc_enabled(image, first_level)) {
1047 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
1048 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
1049 image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) |
1050 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1051 }
1052
1053 if (radv_image_get_iterate256(device, image)) {
1054 state[6] |= S_00A018_ITERATE_256(1);
1055 }
1056
1057 /* Initialize the sampler view for FMASK. */
1058 if (fmask_state) {
1059 if (radv_image_has_fmask(image)) {
1060 uint64_t gpu_address = radv_buffer_get_va(image->bindings[0].bo);
1061 uint32_t format;
1062 uint64_t va;
1063
1064 assert(image->plane_count == 1);
1065
1066 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.fmask_offset;
1067
1068 switch (image->info.samples) {
1069 case 2:
1070 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
1071 break;
1072 case 4:
1073 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
1074 break;
1075 case 8:
1076 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
1077 break;
1078 default:
1079 unreachable("invalid nr_samples");
1080 }
1081
1082 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
1083 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
1084 S_00A004_WIDTH_LO(width - 1);
1085 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
1086 S_00A008_RESOURCE_LEVEL(1);
1087 fmask_state[3] =
1088 S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1089 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1090 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) |
1091 S_00A00C_TYPE(
1092 radv_tex_dim(image->vk.image_type, view_type, image->info.array_size, 0, false, false));
1093 fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
1094 fmask_state[5] = 0;
1095 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
1096 fmask_state[7] = 0;
1097
1098 if (radv_image_is_tc_compat_cmask(image)) {
1099 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.cmask_offset;
1100
1101 fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
1102 fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
1103 fmask_state[7] |= va >> 16;
1104 }
1105 } else
1106 memset(fmask_state, 0, 8 * 4);
1107 }
1108 }
1109
1110 /**
1111 * Build the sampler view descriptor for a texture (SI-GFX9)
1112 */
1113 static void
si_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,float min_lod,uint32_t * state,uint32_t * fmask_state,VkImageCreateFlags img_create_flags)1114 si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1115 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1116 const VkComponentMapping *mapping, unsigned first_level,
1117 unsigned last_level, unsigned first_layer, unsigned last_layer,
1118 unsigned width, unsigned height, unsigned depth, float min_lod,
1119 uint32_t *state, uint32_t *fmask_state,
1120 VkImageCreateFlags img_create_flags)
1121 {
1122 const struct util_format_description *desc;
1123 enum pipe_swizzle swizzle[4];
1124 int first_non_void;
1125 unsigned num_format, data_format, type;
1126
1127 desc = vk_format_description(vk_format);
1128
1129 /* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so
1130 * that border colors work correctly (alpha forced to 1). Since Vulkan has no such format,
1131 * this uses the Gallium formats to set the description. */
1132 if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK &&
1133 vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
1134 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM);
1135 } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK &&
1136 vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
1137 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
1138 }
1139
1140 radv_compose_swizzle(desc, mapping, swizzle);
1141
1142 first_non_void = vk_format_get_first_non_void_channel(vk_format);
1143
1144 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
1145 if (num_format == ~0) {
1146 num_format = 0;
1147 }
1148
1149 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
1150 if (data_format == ~0) {
1151 data_format = 0;
1152 }
1153
1154 /* S8 with either Z16 or Z32 HTILE need a special format. */
1155 if (device->physical_device->rad_info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
1156 radv_image_is_tc_compat_htile(image)) {
1157 if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
1158 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
1159 else if (image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT)
1160 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
1161 }
1162
1163 if (device->physical_device->rad_info.gfx_level == GFX9 &&
1164 img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
1165 assert(image->vk.image_type == VK_IMAGE_TYPE_3D);
1166 type = V_008F1C_SQ_RSRC_IMG_3D;
1167 } else {
1168 type = radv_tex_dim(image->vk.image_type, view_type, image->info.array_size, image->info.samples,
1169 is_storage_image, device->physical_device->rad_info.gfx_level == GFX9);
1170 }
1171
1172 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
1173 height = 1;
1174 depth = image->info.array_size;
1175 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
1176 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
1177 depth = image->info.array_size;
1178 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
1179 depth = image->info.array_size / 6;
1180
1181 state[0] = 0;
1182 state[1] = (S_008F14_MIN_LOD(radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8)) |
1183 S_008F14_DATA_FORMAT(data_format) |
1184 S_008F14_NUM_FORMAT(num_format));
1185 state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
1186 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1187 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1188 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1189 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1190 S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1191 S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1192 : last_level) |
1193 S_008F1C_TYPE(type));
1194 state[4] = 0;
1195 state[5] = S_008F24_BASE_ARRAY(first_layer);
1196 state[6] = 0;
1197 state[7] = 0;
1198
1199 if (device->physical_device->rad_info.gfx_level == GFX9) {
1200 unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
1201
1202 /* Depth is the last accessible layer on Gfx9.
1203 * The hw doesn't need to know the total number of layers.
1204 */
1205 if (type == V_008F1C_SQ_RSRC_IMG_3D)
1206 state[4] |= S_008F20_DEPTH(depth - 1);
1207 else
1208 state[4] |= S_008F20_DEPTH(last_layer);
1209
1210 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1211 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
1212 : image->info.levels - 1);
1213 } else {
1214 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1215 state[4] |= S_008F20_DEPTH(depth - 1);
1216 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1217 }
1218 if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
1219 image->planes[0].surface.meta_offset) {
1220 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1221 } else {
1222 if (device->instance->disable_aniso_single_level) {
1223 /* The last dword is unused by hw. The shader uses it to clear
1224 * bits in the first dword of sampler state.
1225 */
1226 if (device->physical_device->rad_info.gfx_level <= GFX7 && image->info.samples <= 1) {
1227 if (first_level == last_level)
1228 state[7] = C_008F30_MAX_ANISO_RATIO;
1229 else
1230 state[7] = 0xffffffff;
1231 }
1232 }
1233 }
1234
1235 /* Initialize the sampler view for FMASK. */
1236 if (fmask_state) {
1237 if (radv_image_has_fmask(image)) {
1238 uint32_t fmask_format;
1239 uint64_t gpu_address = radv_buffer_get_va(image->bindings[0].bo);
1240 uint64_t va;
1241
1242 assert(image->plane_count == 1);
1243
1244 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.fmask_offset;
1245
1246 if (device->physical_device->rad_info.gfx_level == GFX9) {
1247 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1248 switch (image->info.samples) {
1249 case 2:
1250 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
1251 break;
1252 case 4:
1253 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
1254 break;
1255 case 8:
1256 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
1257 break;
1258 default:
1259 unreachable("invalid nr_samples");
1260 }
1261 } else {
1262 switch (image->info.samples) {
1263 case 2:
1264 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1265 break;
1266 case 4:
1267 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1268 break;
1269 case 8:
1270 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1271 break;
1272 default:
1273 assert(0);
1274 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1275 }
1276 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1277 }
1278
1279 fmask_state[0] = va >> 8;
1280 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1281 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
1282 S_008F14_NUM_FORMAT(num_format);
1283 fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
1284 fmask_state[3] =
1285 S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1286 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1287 S_008F1C_TYPE(
1288 radv_tex_dim(image->vk.image_type, view_type, image->info.array_size, 0, false, false));
1289 fmask_state[4] = 0;
1290 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1291 fmask_state[6] = 0;
1292 fmask_state[7] = 0;
1293
1294 if (device->physical_device->rad_info.gfx_level == GFX9) {
1295 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
1296 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1297 S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
1298 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
1299
1300 if (radv_image_is_tc_compat_cmask(image)) {
1301 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.cmask_offset;
1302
1303 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1304 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1305 fmask_state[7] |= va >> 8;
1306 }
1307 } else {
1308 fmask_state[3] |=
1309 S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index);
1310 fmask_state[4] |=
1311 S_008F20_DEPTH(depth - 1) |
1312 S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1);
1313 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1314
1315 if (radv_image_is_tc_compat_cmask(image)) {
1316 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.cmask_offset;
1317
1318 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1319 fmask_state[7] |= va >> 8;
1320 }
1321 }
1322 } else
1323 memset(fmask_state, 0, 8 * 4);
1324 }
1325 }
1326
1327 static void
radv_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,float min_lod,uint32_t * state,uint32_t * fmask_state,VkImageCreateFlags img_create_flags)1328 radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1329 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1330 const VkComponentMapping *mapping, unsigned first_level,
1331 unsigned last_level, unsigned first_layer, unsigned last_layer,
1332 unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
1333 uint32_t *fmask_state, VkImageCreateFlags img_create_flags)
1334 {
1335 if (device->physical_device->rad_info.gfx_level >= GFX10) {
1336 gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1337 first_level, last_level, first_layer, last_layer, width, height,
1338 depth, min_lod, state, fmask_state, img_create_flags);
1339 } else {
1340 si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1341 first_level, last_level, first_layer, last_layer, width, height,
1342 depth, min_lod, state, fmask_state, img_create_flags);
1343 }
1344 }
1345
1346 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * md)1347 radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
1348 struct radeon_bo_metadata *md)
1349 {
1350 static const VkComponentMapping fixedmapping;
1351 uint32_t desc[8];
1352
1353 assert(image->plane_count == 1);
1354
1355 radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type,
1356 image->vk.format, &fixedmapping, 0, image->info.levels - 1, 0,
1357 image->info.array_size - 1, image->info.width, image->info.height,
1358 image->info.depth, 0.0f, desc, NULL, 0);
1359
1360 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
1361 0, image->planes[0].surface.blk_w, false, false, false, false,
1362 desc);
1363
1364 ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
1365 image->info.levels, desc, &md->size_metadata, md->metadata);
1366 }
1367
1368 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)1369 radv_init_metadata(struct radv_device *device, struct radv_image *image,
1370 struct radeon_bo_metadata *metadata)
1371 {
1372 struct radeon_surf *surface = &image->planes[0].surface;
1373
1374 memset(metadata, 0, sizeof(*metadata));
1375
1376 if (device->physical_device->rad_info.gfx_level >= GFX9) {
1377 uint64_t dcc_offset =
1378 image->bindings[0].offset +
1379 (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
1380 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
1381 metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
1382 metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
1383 metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
1384 metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
1385 metadata->u.gfx9.dcc_max_compressed_block_size =
1386 surface->u.gfx9.color.dcc.max_compressed_block_size;
1387 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1388 } else {
1389 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
1390 ? RADEON_LAYOUT_TILED
1391 : RADEON_LAYOUT_LINEAR;
1392 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
1393 ? RADEON_LAYOUT_TILED
1394 : RADEON_LAYOUT_LINEAR;
1395 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1396 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1397 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1398 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1399 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1400 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1401 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1402 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1403 }
1404 radv_query_opaque_metadata(device, image, metadata);
1405 }
1406
1407 void
radv_image_override_offset_stride(struct radv_device * device,struct radv_image * image,uint64_t offset,uint32_t stride)1408 radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
1409 uint64_t offset, uint32_t stride)
1410 {
1411 ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
1412 image->info.levels, offset, stride);
1413 }
1414
1415 static void
radv_image_alloc_single_sample_cmask(const struct radv_device * device,const struct radv_image * image,struct radeon_surf * surf)1416 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1417 const struct radv_image *image, struct radeon_surf *surf)
1418 {
1419 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 ||
1420 image->info.depth > 1 || radv_image_has_dcc(image) ||
1421 !radv_image_use_fast_clear_for_image(device, image) ||
1422 (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
1423 return;
1424
1425 assert(image->info.storage_samples == 1);
1426
1427 surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
1428 surf->total_size = surf->cmask_offset + surf->cmask_size;
1429 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
1430 }
1431
1432 static void
radv_image_alloc_values(const struct radv_device * device,struct radv_image * image)1433 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1434 {
1435 /* images with modifiers can be potentially imported */
1436 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
1437 return;
1438
1439 if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
1440 image->fce_pred_offset = image->size;
1441 image->size += 8 * image->info.levels;
1442 }
1443
1444 if (radv_image_use_dcc_predication(device, image)) {
1445 image->dcc_pred_offset = image->size;
1446 image->size += 8 * image->info.levels;
1447 }
1448
1449 if ((radv_image_has_dcc(image) && !image->support_comp_to_single) ||
1450 radv_image_has_cmask(image) || radv_image_has_htile(image)) {
1451 image->clear_value_offset = image->size;
1452 image->size += 8 * image->info.levels;
1453 }
1454
1455 if (radv_image_is_tc_compat_htile(image) &&
1456 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1457 /* Metadata for the TC-compatible HTILE hardware bug which
1458 * have to be fixed by updating ZRANGE_PRECISION when doing
1459 * fast depth clears to 0.0f.
1460 */
1461 image->tc_compat_zrange_offset = image->size;
1462 image->size += image->info.levels * 4;
1463 }
1464 }
1465
1466 /* Determine if the image is affected by the pipe misaligned metadata issue
1467 * which requires to invalidate L2.
1468 */
1469 static bool
radv_image_is_pipe_misaligned(const struct radv_device * device,const struct radv_image * image)1470 radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
1471 {
1472 struct radeon_info *rad_info = &device->physical_device->rad_info;
1473 int log2_samples = util_logbase2(image->info.samples);
1474
1475 assert(rad_info->gfx_level >= GFX10);
1476
1477 for (unsigned i = 0; i < image->plane_count; ++i) {
1478 VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i);
1479 int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
1480 int log2_bpp_and_samples;
1481
1482 if (rad_info->gfx_level >= GFX10_3) {
1483 log2_bpp_and_samples = log2_bpp + log2_samples;
1484 } else {
1485 if (vk_format_has_depth(image->vk.format) && image->info.array_size >= 8) {
1486 log2_bpp = 2;
1487 }
1488
1489 log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
1490 }
1491
1492 int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
1493 int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
1494
1495 if (vk_format_has_depth(image->vk.format)) {
1496 if (radv_image_is_tc_compat_htile(image) && overlap) {
1497 return true;
1498 }
1499 } else {
1500 int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
1501 int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
1502 int samples_overlap = MIN2(log2_samples, overlap);
1503
1504 /* TODO: It shouldn't be necessary if the image has DCC but
1505 * not readable by shader.
1506 */
1507 if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
1508 (samples_overlap > log2_samples_frag_diff)) {
1509 return true;
1510 }
1511 }
1512 }
1513
1514 return false;
1515 }
1516
1517 static bool
radv_image_is_l2_coherent(const struct radv_device * device,const struct radv_image * image)1518 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
1519 {
1520 if (device->physical_device->rad_info.gfx_level >= GFX10) {
1521 return !device->physical_device->rad_info.tcc_rb_non_coherent &&
1522 !radv_image_is_pipe_misaligned(device, image);
1523 } else if (device->physical_device->rad_info.gfx_level == GFX9) {
1524 if (image->info.samples == 1 &&
1525 (image->vk.usage &
1526 (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1527 !vk_format_has_stencil(image->vk.format)) {
1528 /* Single-sample color and single-sample depth
1529 * (not stencil) are coherent with shaders on
1530 * GFX9.
1531 */
1532 return true;
1533 }
1534 }
1535
1536 return false;
1537 }
1538
1539 /**
1540 * Determine if the given image can be fast cleared.
1541 */
1542 static bool
radv_image_can_fast_clear(const struct radv_device * device,const struct radv_image * image)1543 radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
1544 {
1545 if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
1546 return false;
1547
1548 if (vk_format_is_color(image->vk.format)) {
1549 if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
1550 return false;
1551
1552 /* RB+ doesn't work with CMASK fast clear on Stoney. */
1553 if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
1554 return false;
1555 } else {
1556 if (!radv_image_has_htile(image))
1557 return false;
1558 }
1559
1560 /* Do not fast clears 3D images. */
1561 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1562 return false;
1563
1564 return true;
1565 }
1566
1567 /**
1568 * Determine if the given image can be fast cleared using comp-to-single.
1569 */
1570 static bool
radv_image_use_comp_to_single(const struct radv_device * device,const struct radv_image * image)1571 radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
1572 {
1573 /* comp-to-single is only available for GFX10+. */
1574 if (device->physical_device->rad_info.gfx_level < GFX10)
1575 return false;
1576
1577 /* If the image can't be fast cleared, comp-to-single can't be used. */
1578 if (!radv_image_can_fast_clear(device, image))
1579 return false;
1580
1581 /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
1582 if (!radv_image_has_dcc(image))
1583 return false;
1584
1585 /* It seems 8bpp and 16bpp require RB+ to work. */
1586 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1587 if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed)
1588 return false;
1589
1590 return true;
1591 }
1592
1593 static unsigned
radv_get_internal_plane_count(const struct radv_physical_device * pdev,VkFormat fmt)1594 radv_get_internal_plane_count(const struct radv_physical_device *pdev, VkFormat fmt)
1595 {
1596 if (pdev->emulate_etc2 && vk_format_description(fmt)->layout == UTIL_FORMAT_LAYOUT_ETC)
1597 return 2;
1598 return vk_format_get_plane_count(fmt);
1599 }
1600
1601 static void
radv_image_reset_layout(const struct radv_physical_device * pdev,struct radv_image * image)1602 radv_image_reset_layout(const struct radv_physical_device *pdev, struct radv_image *image)
1603 {
1604 image->size = 0;
1605 image->alignment = 1;
1606
1607 image->tc_compatible_cmask = 0;
1608 image->fce_pred_offset = image->dcc_pred_offset = 0;
1609 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1610
1611 unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
1612 for (unsigned i = 0; i < plane_count; ++i) {
1613 VkFormat format = radv_image_get_plane_format(pdev, image, i);
1614 if (vk_format_has_depth(format))
1615 format = vk_format_depth_only(format);
1616
1617 uint64_t flags = image->planes[i].surface.flags;
1618 uint64_t modifier = image->planes[i].surface.modifier;
1619 memset(image->planes + i, 0, sizeof(image->planes[i]));
1620
1621 image->planes[i].surface.flags = flags;
1622 image->planes[i].surface.modifier = modifier;
1623 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1624 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1625 image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1626
1627 /* align byte per element on dword */
1628 if (image->planes[i].surface.bpe == 3) {
1629 image->planes[i].surface.bpe = 4;
1630 }
1631 }
1632 }
1633
1634 VkResult
radv_image_create_layout(struct radv_device * device,struct radv_image_create_info create_info,const struct VkImageDrmFormatModifierExplicitCreateInfoEXT * mod_info,struct radv_image * image)1635 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1636 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1637 struct radv_image *image)
1638 {
1639 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1640 * common internal case. */
1641 create_info.vk_info = NULL;
1642
1643 struct ac_surf_info image_info = image->info;
1644 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1645 if (result != VK_SUCCESS)
1646 return result;
1647
1648 assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1649
1650 radv_image_reset_layout(device->physical_device, image);
1651
1652 unsigned plane_count = radv_get_internal_plane_count(device->physical_device, image->vk.format);
1653 for (unsigned plane = 0; plane < plane_count; ++plane) {
1654 struct ac_surf_info info = image_info;
1655 uint64_t offset;
1656 unsigned stride;
1657
1658 info.width = vk_format_get_plane_width(image->vk.format, plane, info.width);
1659 info.height = vk_format_get_plane_height(image->vk.format, plane, info.height);
1660
1661 if (create_info.no_metadata_planes || plane_count > 1) {
1662 image->planes[plane].surface.flags |=
1663 RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1664 }
1665
1666 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1667
1668 if (plane == 0) {
1669 if (!radv_use_dcc_for_image_late(device, image))
1670 ac_surface_zero_dcc_fields(&image->planes[0].surface);
1671 }
1672
1673 if (create_info.bo_metadata && !mod_info &&
1674 !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
1675 &image->planes[plane].surface, image_info.storage_samples,
1676 image_info.levels, create_info.bo_metadata->size_metadata,
1677 create_info.bo_metadata->metadata))
1678 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1679
1680 if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 &&
1681 !mod_info)
1682 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1683
1684 if (mod_info) {
1685 if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1686 !mod_info->pPlaneLayouts[plane].rowPitch)
1687 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1688
1689 offset = mod_info->pPlaneLayouts[plane].offset;
1690 stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1691 } else {
1692 offset = image->disjoint ? 0 :
1693 align64(image->size, 1 << image->planes[plane].surface.alignment_log2);
1694 stride = 0; /* 0 means no override */
1695 }
1696
1697 if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
1698 &image->planes[plane].surface, image->info.levels,
1699 offset, stride))
1700 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1701
1702 /* Validate DCC offsets in modifier layout. */
1703 if (plane_count == 1 && mod_info) {
1704 unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1705 if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1706 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1707
1708 for (unsigned i = 1; i < mem_planes; ++i) {
1709 if (ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
1710 &image->planes[plane].surface, i,
1711 0) != mod_info->pPlaneLayouts[i].offset)
1712 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1713 }
1714 }
1715
1716 image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1717 image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1718
1719 image->planes[plane].format =
1720 radv_image_get_plane_format(device->physical_device, image, plane);
1721 }
1722
1723 image->tc_compatible_cmask =
1724 radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1725
1726 image->l2_coherent = radv_image_is_l2_coherent(device, image);
1727
1728 image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1729
1730 radv_image_alloc_values(device, image);
1731
1732 assert(image->planes[0].surface.surf_size);
1733 assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1734 ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1735 return VK_SUCCESS;
1736 }
1737
1738 static void
radv_destroy_image(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_image * image)1739 radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1740 struct radv_image *image)
1741 {
1742 if ((image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bindings[0].bo)
1743 device->ws->buffer_destroy(device->ws, image->bindings[0].bo);
1744
1745 if (image->owned_memory != VK_NULL_HANDLE) {
1746 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1747 radv_free_memory(device, pAllocator, mem);
1748 }
1749
1750 vk_image_finish(&image->vk);
1751 vk_free2(&device->vk.alloc, pAllocator, image);
1752 }
1753
1754 static void
radv_image_print_info(struct radv_device * device,struct radv_image * image)1755 radv_image_print_info(struct radv_device *device, struct radv_image *image)
1756 {
1757 fprintf(stderr, "Image:\n");
1758 fprintf(stderr,
1759 " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1760 "width=%" PRIu32 ", height=%" PRIu32 ", "
1761 "array_size=%" PRIu32 ", levels=%" PRIu32 "\n",
1762 image->size, image->alignment, image->info.width, image->info.height,
1763 image->info.array_size, image->info.levels);
1764 for (unsigned i = 0; i < image->plane_count; ++i) {
1765 const struct radv_image_plane *plane = &image->planes[i];
1766 const struct radeon_surf *surf = &plane->surface;
1767 const struct util_format_description *desc = vk_format_description(plane->format);
1768 uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
1769 &plane->surface, 0, 0);
1770
1771 fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1772
1773 ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
1774 }
1775 }
1776
1777 static uint64_t
radv_select_modifier(const struct radv_device * dev,VkFormat format,const struct VkImageDrmFormatModifierListCreateInfoEXT * mod_list)1778 radv_select_modifier(const struct radv_device *dev, VkFormat format,
1779 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1780 {
1781 const struct radv_physical_device *pdev = dev->physical_device;
1782 unsigned mod_count;
1783
1784 assert(mod_list->drmFormatModifierCount);
1785
1786 /* We can allow everything here as it does not affect order and the application
1787 * is only allowed to specify modifiers that we support. */
1788 const struct ac_modifier_options modifier_options = {
1789 .dcc = true,
1790 .dcc_retile = true,
1791 };
1792
1793 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1794 &mod_count, NULL);
1795
1796 uint64_t *mods = calloc(mod_count, sizeof(*mods));
1797
1798 /* If allocations fail, fall back to a dumber solution. */
1799 if (!mods)
1800 return mod_list->pDrmFormatModifiers[0];
1801
1802 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1803 &mod_count, mods);
1804
1805 for (unsigned i = 0; i < mod_count; ++i) {
1806 for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1807 if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1808 free(mods);
1809 return mod_list->pDrmFormatModifiers[j];
1810 }
1811 }
1812 }
1813 unreachable("App specified an invalid modifier");
1814 }
1815
1816 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage)1817 radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1818 const VkAllocationCallbacks *alloc, VkImage *pImage)
1819 {
1820 RADV_FROM_HANDLE(radv_device, device, _device);
1821 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1822 uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1823 struct radv_image *image = NULL;
1824 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1825 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1826 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1827 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1828 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1829 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1830
1831 unsigned plane_count = radv_get_internal_plane_count(device->physical_device, format);
1832
1833 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1834
1835 radv_assert(pCreateInfo->mipLevels > 0);
1836 radv_assert(pCreateInfo->arrayLayers > 0);
1837 radv_assert(pCreateInfo->samples > 0);
1838 radv_assert(pCreateInfo->extent.width > 0);
1839 radv_assert(pCreateInfo->extent.height > 0);
1840 radv_assert(pCreateInfo->extent.depth > 0);
1841
1842 image =
1843 vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1844 if (!image)
1845 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1846
1847 vk_image_init(&device->vk, &image->vk, pCreateInfo);
1848
1849 image->info.width = pCreateInfo->extent.width;
1850 image->info.height = pCreateInfo->extent.height;
1851 image->info.depth = pCreateInfo->extent.depth;
1852 image->info.samples = pCreateInfo->samples;
1853 image->info.storage_samples = pCreateInfo->samples;
1854 image->info.array_size = pCreateInfo->arrayLayers;
1855 image->info.levels = pCreateInfo->mipLevels;
1856 image->info.num_channels = vk_format_get_nr_components(format);
1857
1858 image->plane_count = vk_format_get_plane_count(format);
1859 image->disjoint = image->plane_count > 1 && pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT;
1860
1861 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1862 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1863 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1864 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1865 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1866 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1867 else
1868 image->queue_family_mask |= 1u << vk_queue_to_radv(device->physical_device,
1869 pCreateInfo->pQueueFamilyIndices[i]);
1870 }
1871
1872 const VkExternalMemoryImageCreateInfo *external_info =
1873 vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1874
1875 image->shareable = external_info;
1876 if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
1877 !(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
1878 pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1879 image->info.surf_index = &device->image_mrt_offset_counter;
1880 }
1881
1882 if (mod_list)
1883 modifier = radv_select_modifier(device, format, mod_list);
1884 else if (explicit_mod)
1885 modifier = explicit_mod->drmFormatModifier;
1886
1887 for (unsigned plane = 0; plane < plane_count; ++plane) {
1888 image->planes[plane].surface.flags =
1889 radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1890 image->planes[plane].surface.modifier = modifier;
1891 }
1892
1893 bool delay_layout =
1894 external_info && (external_info->handleTypes &
1895 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1896
1897 if (delay_layout) {
1898 *pImage = radv_image_to_handle(image);
1899 assert(!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1900 return VK_SUCCESS;
1901 }
1902
1903 VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
1904 if (result != VK_SUCCESS) {
1905 radv_destroy_image(device, alloc, image);
1906 return result;
1907 }
1908
1909 if (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1910 image->alignment = MAX2(image->alignment, 4096);
1911 image->size = align64(image->size, image->alignment);
1912 image->bindings[0].offset = 0;
1913
1914 result =
1915 device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
1916 RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0,
1917 &image->bindings[0].bo);
1918 if (result != VK_SUCCESS) {
1919 radv_destroy_image(device, alloc, image);
1920 return vk_error(device, result);
1921 }
1922 }
1923
1924 if (device->instance->debug_flags & RADV_DEBUG_IMG) {
1925 radv_image_print_info(device, image);
1926 }
1927
1928 *pImage = radv_image_to_handle(image);
1929
1930 return VK_SUCCESS;
1931 }
1932
1933 static void
radv_image_view_make_descriptor(struct radv_image_view * iview,struct radv_device * device,VkFormat vk_format,const VkComponentMapping * components,float min_lod,bool is_storage_image,bool disable_compression,bool enable_compression,unsigned plane_id,unsigned descriptor_plane_id,VkImageCreateFlags img_create_flags)1934 radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
1935 VkFormat vk_format, const VkComponentMapping *components,
1936 float min_lod,
1937 bool is_storage_image, bool disable_compression,
1938 bool enable_compression, unsigned plane_id,
1939 unsigned descriptor_plane_id, VkImageCreateFlags img_create_flags)
1940 {
1941 struct radv_image *image = iview->image;
1942 struct radv_image_plane *plane = &image->planes[plane_id];
1943 bool is_stencil = iview->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT;
1944 uint32_t blk_w;
1945 union radv_descriptor *descriptor;
1946 uint32_t hw_level = 0;
1947
1948 if (is_storage_image) {
1949 descriptor = &iview->storage_descriptor;
1950 } else {
1951 descriptor = &iview->descriptor;
1952 }
1953
1954 assert(vk_format_get_plane_count(vk_format) == 1);
1955 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1956 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
1957 vk_format_get_blockwidth(vk_format);
1958
1959 if (device->physical_device->rad_info.gfx_level >= GFX9)
1960 hw_level = iview->vk.base_mip_level;
1961 radv_make_texture_descriptor(
1962 device, image, is_storage_image, iview->vk.view_type, vk_format, components, hw_level,
1963 hw_level + iview->vk.level_count - 1, iview->vk.base_array_layer,
1964 iview->vk.base_array_layer + iview->vk.layer_count - 1,
1965 vk_format_get_plane_width(image->vk.format, plane_id, iview->extent.width),
1966 vk_format_get_plane_height(image->vk.format, plane_id, iview->extent.height),
1967 iview->extent.depth, min_lod, descriptor->plane_descriptors[descriptor_plane_id],
1968 descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor,
1969 img_create_flags);
1970
1971 const struct legacy_surf_level *base_level_info = NULL;
1972 if (device->physical_device->rad_info.gfx_level <= GFX9) {
1973 if (is_stencil)
1974 base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->vk.base_mip_level];
1975 else
1976 base_level_info = &plane->surface.u.legacy.level[iview->vk.base_mip_level];
1977 }
1978
1979 bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
1980 if (is_storage_image && !(enable_write_compression || enable_compression))
1981 disable_compression = true;
1982 si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->vk.base_mip_level,
1983 iview->vk.base_mip_level, blk_w, is_stencil, is_storage_image,
1984 disable_compression, enable_write_compression,
1985 descriptor->plane_descriptors[descriptor_plane_id]);
1986 }
1987
1988 static unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)1989 radv_plane_from_aspect(VkImageAspectFlags mask)
1990 {
1991 switch (mask) {
1992 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1993 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1994 return 1;
1995 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1996 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1997 return 2;
1998 case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1999 return 3;
2000 default:
2001 return 0;
2002 }
2003 }
2004
2005 VkFormat
radv_get_aspect_format(struct radv_image * image,VkImageAspectFlags mask)2006 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
2007 {
2008 switch (mask) {
2009 case VK_IMAGE_ASPECT_PLANE_0_BIT:
2010 return image->planes[0].format;
2011 case VK_IMAGE_ASPECT_PLANE_1_BIT:
2012 return image->planes[1].format;
2013 case VK_IMAGE_ASPECT_PLANE_2_BIT:
2014 return image->planes[2].format;
2015 case VK_IMAGE_ASPECT_STENCIL_BIT:
2016 return vk_format_stencil_only(image->vk.format);
2017 case VK_IMAGE_ASPECT_DEPTH_BIT:
2018 return vk_format_depth_only(image->vk.format);
2019 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
2020 return vk_format_depth_only(image->vk.format);
2021 default:
2022 return image->vk.format;
2023 }
2024 }
2025
2026 /**
2027 * Determine if the given image view can be fast cleared.
2028 */
2029 static bool
radv_image_view_can_fast_clear(const struct radv_device * device,const struct radv_image_view * iview)2030 radv_image_view_can_fast_clear(const struct radv_device *device,
2031 const struct radv_image_view *iview)
2032 {
2033 struct radv_image *image;
2034
2035 if (!iview)
2036 return false;
2037 image = iview->image;
2038
2039 /* Only fast clear if the image itself can be fast cleared. */
2040 if (!radv_image_can_fast_clear(device, image))
2041 return false;
2042
2043 /* Only fast clear if all layers are bound. */
2044 if (iview->vk.base_array_layer > 0 || iview->vk.layer_count != image->info.array_size)
2045 return false;
2046
2047 /* Only fast clear if the view covers the whole image. */
2048 if (!radv_image_extent_compare(image, &iview->extent))
2049 return false;
2050
2051 return true;
2052 }
2053
2054 void
radv_image_view_init(struct radv_image_view * iview,struct radv_device * device,const VkImageViewCreateInfo * pCreateInfo,VkImageCreateFlags img_create_flags,const struct radv_image_view_extra_create_info * extra_create_info)2055 radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
2056 const VkImageViewCreateInfo *pCreateInfo,
2057 VkImageCreateFlags img_create_flags,
2058 const struct radv_image_view_extra_create_info *extra_create_info)
2059 {
2060 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
2061 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
2062 uint32_t plane_count = 1;
2063 float min_lod = 0.0f;
2064
2065 const struct VkImageViewMinLodCreateInfoEXT *min_lod_info =
2066 vk_find_struct_const(pCreateInfo->pNext, IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT);
2067
2068 if (min_lod_info)
2069 min_lod = min_lod_info->minLod;
2070
2071 bool from_client = extra_create_info && extra_create_info->from_client;
2072 vk_image_view_init(&device->vk, &iview->vk, !from_client, pCreateInfo);
2073
2074 switch (image->vk.image_type) {
2075 case VK_IMAGE_TYPE_1D:
2076 case VK_IMAGE_TYPE_2D:
2077 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
2078 image->info.array_size);
2079 break;
2080 case VK_IMAGE_TYPE_3D:
2081 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
2082 radv_minify(image->info.depth, range->baseMipLevel));
2083 break;
2084 default:
2085 unreachable("bad VkImageType");
2086 }
2087 iview->image = image;
2088 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
2089
2090 /* If the image has an Android external format, pCreateInfo->format will be
2091 * VK_FORMAT_UNDEFINED. */
2092 if (iview->vk.format == VK_FORMAT_UNDEFINED) {
2093 iview->vk.format = image->vk.format;
2094 iview->vk.view_format = image->vk.format;
2095 }
2096
2097 /* Split out the right aspect. Note that for internal meta code we sometimes
2098 * use an equivalent color format for the aspect so we first have to check
2099 * if we actually got depth/stencil formats. */
2100 if (iview->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT) {
2101 if (vk_format_has_stencil(iview->vk.view_format))
2102 iview->vk.view_format = vk_format_stencil_only(iview->vk.view_format);
2103 } else if (iview->vk.aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
2104 if (vk_format_has_depth(iview->vk.view_format))
2105 iview->vk.view_format = vk_format_depth_only(iview->vk.view_format);
2106 }
2107
2108 if (vk_format_get_plane_count(image->vk.format) > 1 &&
2109 pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
2110 plane_count = vk_format_get_plane_count(iview->vk.format);
2111 }
2112
2113 if (device->physical_device->emulate_etc2 &&
2114 vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
2115 const struct util_format_description *desc = vk_format_description(iview->vk.format);
2116 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
2117 iview->plane_id = 1;
2118 iview->vk.format = etc2_emulation_format(iview->vk.format);
2119 iview->vk.view_format = etc2_emulation_format(iview->vk.format);
2120 }
2121
2122 plane_count = 1;
2123 }
2124
2125 if (device->physical_device->rad_info.gfx_level >= GFX9) {
2126 iview->extent = (VkExtent3D){
2127 .width = image->info.width,
2128 .height = image->info.height,
2129 .depth = image->info.depth,
2130 };
2131 } else {
2132 iview->extent = iview->vk.extent;
2133 }
2134
2135 if (iview->vk.format != image->planes[iview->plane_id].format) {
2136 unsigned view_bw = vk_format_get_blockwidth(iview->vk.format);
2137 unsigned view_bh = vk_format_get_blockheight(iview->vk.format);
2138 unsigned img_bw = vk_format_get_blockwidth(image->planes[iview->plane_id].format);
2139 unsigned img_bh = vk_format_get_blockheight(image->planes[iview->plane_id].format);
2140
2141 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
2142 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
2143
2144 /* Comment ported from amdvlk -
2145 * If we have the following image:
2146 * Uncompressed pixels Compressed block sizes (4x4)
2147 * mip0: 22 x 22 6 x 6
2148 * mip1: 11 x 11 3 x 3
2149 * mip2: 5 x 5 2 x 2
2150 * mip3: 2 x 2 1 x 1
2151 * mip4: 1 x 1 1 x 1
2152 *
2153 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and
2154 * the HW is calculating the degradation of the block sizes down the mip-chain as follows
2155 * (straight-up divide-by-two integer math): mip0: 6x6 mip1: 3x3 mip2: 1x1 mip3: 1x1
2156 *
2157 * This means that mip2 will be missing texels.
2158 *
2159 * Fix this by calculating the base mip's width and height, then convert
2160 * that, and round it back up to get the level 0 size. Clamp the
2161 * converted size between the original values, and the physical extent
2162 * of the base mipmap.
2163 *
2164 * On GFX10 we have to take care to not go over the physical extent
2165 * of the base mipmap as otherwise the GPU computes a different layout.
2166 * Note that the GPU does use the same base-mip dimensions for both a
2167 * block compatible format and the compressed format, so even if we take
2168 * the plain converted dimensions the physical layout is correct.
2169 */
2170 if (device->physical_device->rad_info.gfx_level >= GFX9 &&
2171 vk_format_is_compressed(image->vk.format) && !vk_format_is_compressed(iview->vk.format)) {
2172 /* If we have multiple levels in the view we should ideally take the last level,
2173 * but the mip calculation has a max(..., 1) so walking back to the base mip in an
2174 * useful way is hard. */
2175 if (iview->vk.level_count > 1) {
2176 iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
2177 iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
2178 } else {
2179 unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel);
2180 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
2181
2182 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
2183 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
2184
2185 lvl_width <<= range->baseMipLevel;
2186 lvl_height <<= range->baseMipLevel;
2187
2188 iview->extent.width = CLAMP(lvl_width, iview->extent.width,
2189 iview->image->planes[0].surface.u.gfx9.base_mip_width);
2190 iview->extent.height = CLAMP(lvl_height, iview->extent.height,
2191 iview->image->planes[0].surface.u.gfx9.base_mip_height);
2192 }
2193 }
2194 }
2195
2196 iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
2197 iview->disable_dcc_mrt = extra_create_info ? extra_create_info->disable_dcc_mrt : false;
2198
2199 bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
2200 bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false;
2201 for (unsigned i = 0; i < plane_count; ++i) {
2202 VkFormat format = vk_format_get_plane_format(iview->vk.view_format, i);
2203 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, false,
2204 disable_compression, enable_compression, iview->plane_id + i,
2205 i, img_create_flags);
2206 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, true,
2207 disable_compression, enable_compression, iview->plane_id + i,
2208 i, img_create_flags);
2209 }
2210 }
2211
2212 void
radv_image_view_finish(struct radv_image_view * iview)2213 radv_image_view_finish(struct radv_image_view *iview)
2214 {
2215 vk_image_view_finish(&iview->vk);
2216 }
2217
2218 bool
radv_layout_is_htile_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2219 radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
2220 VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
2221 {
2222 switch (layout) {
2223 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
2224 case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
2225 case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
2226 case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL:
2227 return radv_image_has_htile(image);
2228 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
2229 return radv_image_is_tc_compat_htile(image) ||
2230 (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
2231 case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
2232 case VK_IMAGE_LAYOUT_GENERAL:
2233 /* It should be safe to enable TC-compat HTILE with
2234 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
2235 * if the image doesn't have the storage bit set. This
2236 * improves performance for apps that use GENERAL for the main
2237 * depth pass because this allows compression and this reduces
2238 * the number of decompressions from/to GENERAL.
2239 */
2240 /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
2241 * queue is likely broken for eg. depth/stencil copies.
2242 */
2243 if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
2244 !in_render_loop && !device->instance->disable_tc_compat_htile_in_general) {
2245 return true;
2246 } else {
2247 return false;
2248 }
2249 case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
2250 /* Do not compress HTILE with feedback loops because we can't read&write it without
2251 * introducing corruption.
2252 */
2253 return false;
2254 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
2255 if (radv_image_is_tc_compat_htile(image) ||
2256 (radv_image_has_htile(image) &&
2257 !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
2258 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
2259 /* Keep HTILE compressed if the image is only going to
2260 * be used as a depth/stencil read-only attachment.
2261 */
2262 return true;
2263 } else {
2264 return false;
2265 }
2266 break;
2267 default:
2268 return radv_image_is_tc_compat_htile(image);
2269 }
2270 }
2271
2272 bool
radv_layout_can_fast_clear(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2273 radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2274 unsigned level, VkImageLayout layout, bool in_render_loop,
2275 unsigned queue_mask)
2276 {
2277 if (radv_dcc_enabled(image, level) &&
2278 !radv_layout_dcc_compressed(device, image, level, layout, in_render_loop, queue_mask))
2279 return false;
2280
2281 if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
2282 return false;
2283
2284 if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
2285 layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
2286 return false;
2287
2288 /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
2289 * images can only be fast-cleared if comp-to-single is supported because we don't yet support
2290 * FCE on the compute queue.
2291 */
2292 return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
2293 }
2294
2295 bool
radv_layout_dcc_compressed(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2296 radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2297 unsigned level, VkImageLayout layout, bool in_render_loop,
2298 unsigned queue_mask)
2299 {
2300 if (!radv_dcc_enabled(image, level))
2301 return false;
2302
2303 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT &&
2304 queue_mask & (1u << RADV_QUEUE_FOREIGN))
2305 return true;
2306
2307 /* If the image is read-only, we can always just keep it compressed */
2308 if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
2309 return true;
2310
2311 /* Don't compress compute transfer dst when image stores are not supported. */
2312 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2313 (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
2314 return false;
2315
2316 if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
2317 /* Do not compress DCC with feedback loops because we can't read&write it without introducing
2318 * corruption.
2319 */
2320 return false;
2321 }
2322
2323 return device->physical_device->rad_info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
2324 }
2325
2326 bool
radv_layout_fmask_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)2327 radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2328 VkImageLayout layout, unsigned queue_mask)
2329 {
2330 if (!radv_image_has_fmask(image))
2331 return false;
2332
2333 /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
2334 * expanded before.
2335 */
2336 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2337 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
2338 return false;
2339
2340 /* Only compress concurrent images if TC-compat CMASK is enabled (no FMASK decompression). */
2341 return layout != VK_IMAGE_LAYOUT_GENERAL &&
2342 (queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_is_tc_compat_cmask(image));
2343 }
2344
2345 unsigned
radv_image_queue_family_mask(const struct radv_image * image,enum radv_queue_family family,enum radv_queue_family queue_family)2346 radv_image_queue_family_mask(const struct radv_image *image,
2347 enum radv_queue_family family,
2348 enum radv_queue_family queue_family)
2349 {
2350 if (!image->exclusive)
2351 return image->queue_family_mask;
2352 if (family == RADV_QUEUE_FOREIGN)
2353 return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
2354 if (family == RADV_QUEUE_IGNORED)
2355 return 1u << queue_family;
2356 return 1u << family;
2357 }
2358
2359 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateImage(VkDevice device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)2360 radv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo,
2361 const VkAllocationCallbacks *pAllocator, VkImage *pImage)
2362 {
2363 #ifdef ANDROID
2364 const VkNativeBufferANDROID *gralloc_info =
2365 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
2366
2367 if (gralloc_info)
2368 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage);
2369 #endif
2370
2371 const struct wsi_image_create_info *wsi_info =
2372 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
2373 bool scanout = wsi_info && wsi_info->scanout;
2374 bool prime_blit_src = wsi_info && wsi_info->buffer_blit_src;
2375
2376 return radv_image_create(device,
2377 &(struct radv_image_create_info){
2378 .vk_info = pCreateInfo,
2379 .scanout = scanout,
2380 .prime_blit_src = prime_blit_src,
2381 },
2382 pAllocator, pImage);
2383 }
2384
2385 VKAPI_ATTR void VKAPI_CALL
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)2386 radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
2387 {
2388 RADV_FROM_HANDLE(radv_device, device, _device);
2389 RADV_FROM_HANDLE(radv_image, image, _image);
2390
2391 if (!image)
2392 return;
2393
2394 radv_destroy_image(device, pAllocator, image);
2395 }
2396
2397 VKAPI_ATTR void VKAPI_CALL
radv_GetImageSubresourceLayout(VkDevice _device,VkImage _image,const VkImageSubresource * pSubresource,VkSubresourceLayout * pLayout)2398 radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
2399 const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
2400 {
2401 RADV_FROM_HANDLE(radv_image, image, _image);
2402 RADV_FROM_HANDLE(radv_device, device, _device);
2403 int level = pSubresource->mipLevel;
2404 int layer = pSubresource->arrayLayer;
2405
2406 unsigned plane_id = 0;
2407 if (vk_format_get_plane_count(image->vk.format) > 1)
2408 plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2409
2410 struct radv_image_plane *plane = &image->planes[plane_id];
2411 struct radeon_surf *surface = &plane->surface;
2412
2413 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
2414 unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2415
2416 assert(level == 0);
2417 assert(layer == 0);
2418
2419 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
2420 surface, mem_plane_id, 0);
2421 pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.gfx_level,
2422 surface, mem_plane_id, level);
2423 pLayout->arrayPitch = 0;
2424 pLayout->depthPitch = 0;
2425 pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
2426 } else if (device->physical_device->rad_info.gfx_level >= GFX9) {
2427 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
2428
2429 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
2430 &plane->surface, 0, layer) +
2431 level_offset;
2432 if (image->vk.format == VK_FORMAT_R32G32B32_UINT ||
2433 image->vk.format == VK_FORMAT_R32G32B32_SINT ||
2434 image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
2435 /* Adjust the number of bytes between each row because
2436 * the pitch is actually the number of components per
2437 * row.
2438 */
2439 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
2440 } else {
2441 uint32_t pitch =
2442 surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
2443
2444 assert(util_is_power_of_two_nonzero(surface->bpe));
2445 pLayout->rowPitch = pitch * surface->bpe;
2446 }
2447
2448 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
2449 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
2450 pLayout->size = surface->u.gfx9.surf_slice_size;
2451 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
2452 pLayout->size *= u_minify(image->info.depth, level);
2453 } else {
2454 pLayout->offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
2455 (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
2456 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
2457 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2458 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2459 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2460 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
2461 pLayout->size *= u_minify(image->info.depth, level);
2462 }
2463 }
2464
2465 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,VkImage _image,VkImageDrmFormatModifierPropertiesEXT * pProperties)2466 radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
2467 VkImageDrmFormatModifierPropertiesEXT *pProperties)
2468 {
2469 RADV_FROM_HANDLE(radv_image, image, _image);
2470
2471 pProperties->drmFormatModifier = image->planes[0].surface.modifier;
2472 return VK_SUCCESS;
2473 }
2474
2475 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateImageView(VkDevice _device,const VkImageViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImageView * pView)2476 radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
2477 const VkAllocationCallbacks *pAllocator, VkImageView *pView)
2478 {
2479 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
2480 RADV_FROM_HANDLE(radv_device, device, _device);
2481 struct radv_image_view *view;
2482
2483 view =
2484 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2485 if (view == NULL)
2486 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2487
2488 radv_image_view_init(view, device, pCreateInfo, image->vk.create_flags,
2489 &(struct radv_image_view_extra_create_info){.from_client = true});
2490
2491 *pView = radv_image_view_to_handle(view);
2492
2493 return VK_SUCCESS;
2494 }
2495
2496 VKAPI_ATTR void VKAPI_CALL
radv_DestroyImageView(VkDevice _device,VkImageView _iview,const VkAllocationCallbacks * pAllocator)2497 radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator)
2498 {
2499 RADV_FROM_HANDLE(radv_device, device, _device);
2500 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
2501
2502 if (!iview)
2503 return;
2504
2505 radv_image_view_finish(iview);
2506 vk_free2(&device->vk.alloc, pAllocator, iview);
2507 }
2508
2509 void
radv_buffer_view_init(struct radv_buffer_view * view,struct radv_device * device,const VkBufferViewCreateInfo * pCreateInfo)2510 radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2511 const VkBufferViewCreateInfo *pCreateInfo)
2512 {
2513 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
2514
2515 vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW);
2516
2517 view->bo = buffer->bo;
2518 view->range = vk_buffer_range(&buffer->vk, pCreateInfo->offset, pCreateInfo->range);
2519 view->vk_format = pCreateInfo->format;
2520
2521 radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range,
2522 view->state);
2523 }
2524
2525 void
radv_buffer_view_finish(struct radv_buffer_view * view)2526 radv_buffer_view_finish(struct radv_buffer_view *view)
2527 {
2528 vk_object_base_finish(&view->base);
2529 }
2530
2531 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateBufferView(VkDevice _device,const VkBufferViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBufferView * pView)2532 radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
2533 const VkAllocationCallbacks *pAllocator, VkBufferView *pView)
2534 {
2535 RADV_FROM_HANDLE(radv_device, device, _device);
2536 struct radv_buffer_view *view;
2537
2538 view =
2539 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2540 if (!view)
2541 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2542
2543 radv_buffer_view_init(view, device, pCreateInfo);
2544
2545 *pView = radv_buffer_view_to_handle(view);
2546
2547 return VK_SUCCESS;
2548 }
2549
2550 VKAPI_ATTR void VKAPI_CALL
radv_DestroyBufferView(VkDevice _device,VkBufferView bufferView,const VkAllocationCallbacks * pAllocator)2551 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
2552 const VkAllocationCallbacks *pAllocator)
2553 {
2554 RADV_FROM_HANDLE(radv_device, device, _device);
2555 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
2556
2557 if (!view)
2558 return;
2559
2560 radv_buffer_view_finish(view);
2561 vk_free2(&device->vk.alloc, pAllocator, view);
2562 }
2563