1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "ac_drm_fourcc.h"
29 #include "util/debug.h"
30 #include "util/u_atomic.h"
31 #include "vulkan/util/vk_format.h"
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_radeon_winsys.h"
35 #include "sid.h"
36 #include "vk_format.h"
37 #include "vk_util.h"
38
39 #include "gfx10_format_table.h"
40
41 static const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS =
42 VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
43 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
44
45 static unsigned
radv_choose_tiling(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)46 radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
47 VkFormat format)
48 {
49 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
50 assert(pCreateInfo->samples <= 1);
51 return RADEON_SURF_MODE_LINEAR_ALIGNED;
52 }
53
54 /* MSAA resources must be 2D tiled. */
55 if (pCreateInfo->samples > 1)
56 return RADEON_SURF_MODE_2D;
57
58 if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
59 device->physical_device->rad_info.chip_class <= GFX8) {
60 /* this causes hangs in some VK CTS tests on GFX9. */
61 /* Textures with a very small height are recommended to be linear. */
62 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
63 /* Only very thin and long 2D textures should benefit from
64 * linear_aligned. */
65 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
66 return RADEON_SURF_MODE_LINEAR_ALIGNED;
67 }
68
69 return RADEON_SURF_MODE_2D;
70 }
71
72 static bool
radv_use_tc_compat_htile_for_image(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)73 radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
74 VkFormat format)
75 {
76 /* TC-compat HTILE is only available for GFX8+. */
77 if (device->physical_device->rad_info.chip_class < GFX8)
78 return false;
79
80 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
81 return false;
82
83 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
84 return false;
85
86 /* Do not enable TC-compatible HTILE if the image isn't readable by a
87 * shader because no texture fetches will happen.
88 */
89 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
90 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
91 return false;
92
93 if (device->physical_device->rad_info.chip_class < GFX9) {
94 /* TC-compat HTILE for MSAA depth/stencil images is broken
95 * on GFX8 because the tiling doesn't match.
96 */
97 if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
98 return false;
99
100 /* GFX9+ supports compression for both 32-bit and 16-bit depth
101 * surfaces, while GFX8 only supports 32-bit natively. Though,
102 * the driver allows TC-compat HTILE for 16-bit depth surfaces
103 * with no Z planes compression.
104 */
105 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
106 format != VK_FORMAT_D16_UNORM)
107 return false;
108 }
109
110 return true;
111 }
112
113 static bool
radv_surface_has_scanout(struct radv_device * device,const struct radv_image_create_info * info)114 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
115 {
116 if (info->bo_metadata) {
117 if (device->physical_device->rad_info.chip_class >= GFX9)
118 return info->bo_metadata->u.gfx9.scanout;
119 else
120 return info->bo_metadata->u.legacy.scanout;
121 }
122
123 return info->scanout;
124 }
125
126 static bool
radv_image_use_fast_clear_for_image_early(const struct radv_device * device,const struct radv_image * image)127 radv_image_use_fast_clear_for_image_early(const struct radv_device *device,
128 const struct radv_image *image)
129 {
130 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
131 return true;
132
133 if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) {
134 /* Do not enable CMASK or DCC for small surfaces where the cost
135 * of the eliminate pass can be higher than the benefit of fast
136 * clear. RadeonSI does this, but the image threshold is
137 * different.
138 */
139 return false;
140 }
141
142 return !!(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
143 }
144
145 static bool
radv_image_use_fast_clear_for_image(const struct radv_device * device,const struct radv_image * image)146 radv_image_use_fast_clear_for_image(const struct radv_device *device,
147 const struct radv_image *image)
148 {
149 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
150 return true;
151
152 return radv_image_use_fast_clear_for_image_early(device, image) &&
153 (image->exclusive ||
154 /* Enable DCC for concurrent images if stores are
155 * supported because that means we can keep DCC compressed on
156 * all layouts/queues.
157 */
158 radv_image_use_dcc_image_stores(device, image));
159 }
160
161 bool
radv_are_formats_dcc_compatible(const struct radv_physical_device * pdev,const void * pNext,VkFormat format,VkImageCreateFlags flags,bool * sign_reinterpret)162 radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
163 VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret)
164 {
165 bool blendable;
166
167 if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
168 return false;
169
170 if (sign_reinterpret != NULL)
171 *sign_reinterpret = false;
172
173 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
174 const struct VkImageFormatListCreateInfo *format_list =
175 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
176 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
177
178 /* We have to ignore the existence of the list if viewFormatCount = 0 */
179 if (format_list && format_list->viewFormatCount) {
180 /* compatibility is transitive, so we only need to check
181 * one format with everything else. */
182 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
183 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
184 continue;
185
186 if (!radv_dcc_formats_compatible(format, format_list->pViewFormats[i],
187 sign_reinterpret))
188 return false;
189 }
190 } else {
191 return false;
192 }
193 }
194
195 return true;
196 }
197
198 static bool
radv_format_is_atomic_allowed(struct radv_device * device,VkFormat format)199 radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
200 {
201 if (format == VK_FORMAT_R32_SFLOAT && !device->image_float32_atomics)
202 return false;
203
204 return radv_is_atomic_format_supported(format);
205 }
206
207 static bool
radv_formats_is_atomic_allowed(struct radv_device * device,const void * pNext,VkFormat format,VkImageCreateFlags flags)208 radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format,
209 VkImageCreateFlags flags)
210 {
211 if (radv_format_is_atomic_allowed(device, format))
212 return true;
213
214 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
215 const struct VkImageFormatListCreateInfo *format_list =
216 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
217 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
218
219 /* We have to ignore the existence of the list if viewFormatCount = 0 */
220 if (format_list && format_list->viewFormatCount) {
221 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
222 if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
223 return true;
224 }
225 }
226 }
227
228 return false;
229 }
230
231 static bool
radv_use_dcc_for_image_early(struct radv_device * device,struct radv_image * image,const VkImageCreateInfo * pCreateInfo,VkFormat format,bool * sign_reinterpret)232 radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image,
233 const VkImageCreateInfo *pCreateInfo, VkFormat format,
234 bool *sign_reinterpret)
235 {
236 /* DCC (Delta Color Compression) is only available for GFX8+. */
237 if (device->physical_device->rad_info.chip_class < GFX8)
238 return false;
239
240 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
241 return false;
242
243 if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
244 return false;
245
246 /*
247 * TODO: Enable DCC for storage images on GFX9 and earlier.
248 *
249 * Also disable DCC with atomics because even when DCC stores are
250 * supported atomics will always decompress. So if we are
251 * decompressing a lot anyway we might as well not have DCC.
252 */
253 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
254 (device->physical_device->rad_info.chip_class < GFX10 ||
255 radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
256 return false;
257
258 /* Do not enable DCC for fragment shading rate attachments. */
259 if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
260 return false;
261
262 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
263 return false;
264
265 if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
266 return false;
267
268 if (!radv_image_use_fast_clear_for_image_early(device, image) &&
269 image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
270 return false;
271
272 /* Do not enable DCC for mipmapped arrays because performance is worse. */
273 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
274 return false;
275
276 if (device->physical_device->rad_info.chip_class < GFX10) {
277 /* TODO: Add support for DCC MSAA on GFX8-9. */
278 if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
279 return false;
280
281 /* TODO: Add support for DCC layers/mipmaps on GFX9. */
282 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
283 device->physical_device->rad_info.chip_class == GFX9)
284 return false;
285 }
286
287 return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
288 pCreateInfo->flags, sign_reinterpret);
289 }
290
291 static bool
radv_use_dcc_for_image_late(struct radv_device * device,struct radv_image * image)292 radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
293 {
294 if (!radv_image_has_dcc(image))
295 return false;
296
297 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
298 return true;
299
300 if (!radv_image_use_fast_clear_for_image(device, image))
301 return false;
302
303 /* TODO: Fix storage images with DCC without DCC image stores.
304 * Disabling it for now. */
305 if ((image->usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image))
306 return false;
307
308 return true;
309 }
310
311 /*
312 * Whether to enable image stores with DCC compression for this image. If
313 * this function returns false the image subresource should be decompressed
314 * before using it with image stores.
315 *
316 * Note that this can have mixed performance implications, see
317 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
318 *
319 * This function assumes the image uses DCC compression.
320 */
321 bool
radv_image_use_dcc_image_stores(const struct radv_device * device,const struct radv_image * image)322 radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
323 {
324 return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.chip_class,
325 &image->planes[0].surface);
326 }
327
328 /*
329 * Whether to use a predicate to determine whether DCC is in a compressed
330 * state. This can be used to avoid decompressing an image multiple times.
331 */
332 bool
radv_image_use_dcc_predication(const struct radv_device * device,const struct radv_image * image)333 radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
334 {
335 return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
336 }
337
338 static inline bool
radv_use_fmask_for_image(const struct radv_device * device,const struct radv_image * image)339 radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
340 {
341 return image->info.samples > 1 && ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
342 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
343 }
344
345 static inline bool
radv_use_htile_for_image(const struct radv_device * device,const struct radv_image * image)346 radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image)
347 {
348 /* TODO:
349 * - Investigate about mips+layers.
350 * - Enable on other gens.
351 */
352 bool use_htile_for_mips =
353 image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10;
354
355 /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
356 if (device->physical_device->rad_info.chip_class == GFX10 &&
357 image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1)
358 return false;
359
360 /* Do not enable HTILE for very small images because it seems less performant but make sure it's
361 * allowed with VRS attachments because we need HTILE.
362 */
363 if (image->info.width * image->info.height < 8 * 8 &&
364 !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
365 !device->attachment_vrs_enabled)
366 return false;
367
368 return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable;
369 }
370
371 static bool
radv_use_tc_compat_cmask_for_image(struct radv_device * device,struct radv_image * image)372 radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
373 {
374 /* TC-compat CMASK is only available for GFX8+. */
375 if (device->physical_device->rad_info.chip_class < GFX8)
376 return false;
377
378 if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
379 return false;
380
381 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
382 return false;
383
384 /* Do not enable TC-compatible if the image isn't readable by a shader
385 * because no texture fetches will happen.
386 */
387 if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
388 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
389 return false;
390
391 /* If the image doesn't have FMASK, it can't be fetchable. */
392 if (!radv_image_has_fmask(image))
393 return false;
394
395 return true;
396 }
397
398 static uint32_t
si_get_bo_metadata_word1(const struct radv_device * device)399 si_get_bo_metadata_word1(const struct radv_device *device)
400 {
401 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
402 }
403
404 static bool
radv_is_valid_opaque_metadata(const struct radv_device * device,const struct radeon_bo_metadata * md)405 radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
406 {
407 if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device))
408 return false;
409
410 if (md->size_metadata < 40)
411 return false;
412
413 return true;
414 }
415
416 static void
radv_patch_surface_from_metadata(struct radv_device * device,struct radeon_surf * surface,const struct radeon_bo_metadata * md)417 radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
418 const struct radeon_bo_metadata *md)
419 {
420 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
421
422 if (device->physical_device->rad_info.chip_class >= GFX9) {
423 if (md->u.gfx9.swizzle_mode > 0)
424 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
425 else
426 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
427
428 surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
429 } else {
430 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
431 surface->u.legacy.bankw = md->u.legacy.bankw;
432 surface->u.legacy.bankh = md->u.legacy.bankh;
433 surface->u.legacy.tile_split = md->u.legacy.tile_split;
434 surface->u.legacy.mtilea = md->u.legacy.mtilea;
435 surface->u.legacy.num_banks = md->u.legacy.num_banks;
436
437 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
438 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
439 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
440 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
441 else
442 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
443 }
444 }
445
446 static VkResult
radv_patch_image_dimensions(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)447 radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
448 const struct radv_image_create_info *create_info,
449 struct ac_surf_info *image_info)
450 {
451 unsigned width = image->info.width;
452 unsigned height = image->info.height;
453
454 /*
455 * minigbm sometimes allocates bigger images which is going to result in
456 * weird strides and other properties. Lets be lenient where possible and
457 * fail it on GFX10 (as we cannot cope there).
458 *
459 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
460 */
461 if (create_info->bo_metadata &&
462 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
463 const struct radeon_bo_metadata *md = create_info->bo_metadata;
464
465 if (device->physical_device->rad_info.chip_class >= GFX10) {
466 width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
467 height = G_00A008_HEIGHT(md->metadata[4]) + 1;
468 } else {
469 width = G_008F18_WIDTH(md->metadata[4]) + 1;
470 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
471 }
472 }
473
474 if (image->info.width == width && image->info.height == height)
475 return VK_SUCCESS;
476
477 if (width < image->info.width || height < image->info.height) {
478 fprintf(stderr,
479 "The imported image has smaller dimensions than the internal\n"
480 "dimensions. Using it is going to fail badly, so we reject\n"
481 "this import.\n"
482 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
483 image->info.width, image->info.height, width, height);
484 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
485 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
486 fprintf(stderr,
487 "Tried to import an image with inconsistent width on GFX10.\n"
488 "As GFX10 has no separate stride fields we cannot cope with\n"
489 "an inconsistency in width and will fail this import.\n"
490 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
491 image->info.width, image->info.height, width, height);
492 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
493 } else {
494 fprintf(stderr,
495 "Tried to import an image with inconsistent width on pre-GFX10.\n"
496 "As GFX10 has no separate stride fields we cannot cope with\n"
497 "an inconsistency and would fail on GFX10.\n"
498 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
499 image->info.width, image->info.height, width, height);
500 }
501 image_info->width = width;
502 image_info->height = height;
503
504 return VK_SUCCESS;
505 }
506
507 static VkResult
radv_patch_image_from_extra_info(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)508 radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
509 const struct radv_image_create_info *create_info,
510 struct ac_surf_info *image_info)
511 {
512 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
513 if (result != VK_SUCCESS)
514 return result;
515
516 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
517 if (create_info->bo_metadata) {
518 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
519 create_info->bo_metadata);
520 }
521
522 if (radv_surface_has_scanout(device, create_info)) {
523 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
524 if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
525 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
526
527 image->info.surf_index = NULL;
528 }
529 }
530 return VK_SUCCESS;
531 }
532
533 static uint64_t
radv_get_surface_flags(struct radv_device * device,struct radv_image * image,unsigned plane_id,const VkImageCreateInfo * pCreateInfo,VkFormat image_format)534 radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
535 const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
536 {
537 uint64_t flags;
538 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
539 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
540 const struct util_format_description *desc = vk_format_description(format);
541 bool is_depth, is_stencil;
542
543 is_depth = util_format_has_depth(desc);
544 is_stencil = util_format_has_stencil(desc);
545
546 flags = RADEON_SURF_SET(array_mode, MODE);
547
548 switch (pCreateInfo->imageType) {
549 case VK_IMAGE_TYPE_1D:
550 if (pCreateInfo->arrayLayers > 1)
551 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
552 else
553 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
554 break;
555 case VK_IMAGE_TYPE_2D:
556 if (pCreateInfo->arrayLayers > 1)
557 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
558 else
559 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
560 break;
561 case VK_IMAGE_TYPE_3D:
562 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
563 break;
564 default:
565 unreachable("unhandled image type");
566 }
567
568 /* Required for clearing/initializing a specific layer on GFX8. */
569 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
570
571 if (is_depth) {
572 flags |= RADEON_SURF_ZBUFFER;
573
574 if (radv_use_htile_for_image(device, image) &&
575 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
576 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
577 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
578 } else {
579 flags |= RADEON_SURF_NO_HTILE;
580 }
581 }
582
583 if (is_stencil)
584 flags |= RADEON_SURF_SBUFFER;
585
586 if (device->physical_device->rad_info.chip_class >= GFX9 &&
587 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
588 vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
589 flags |= RADEON_SURF_NO_RENDER_TARGET;
590
591 if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format,
592 &image->dcc_sign_reinterpret))
593 flags |= RADEON_SURF_DISABLE_DCC;
594
595 if (!radv_use_fmask_for_image(device, image))
596 flags |= RADEON_SURF_NO_FMASK;
597
598 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
599 flags |=
600 RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
601 }
602
603 return flags;
604 }
605
606 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)607 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
608 {
609 if (stencil)
610 return plane->surface.u.legacy.zs.stencil_tiling_index[level];
611 else
612 return plane->surface.u.legacy.tiling_index[level];
613 }
614
615 static unsigned
radv_map_swizzle(unsigned swizzle)616 radv_map_swizzle(unsigned swizzle)
617 {
618 switch (swizzle) {
619 case PIPE_SWIZZLE_Y:
620 return V_008F0C_SQ_SEL_Y;
621 case PIPE_SWIZZLE_Z:
622 return V_008F0C_SQ_SEL_Z;
623 case PIPE_SWIZZLE_W:
624 return V_008F0C_SQ_SEL_W;
625 case PIPE_SWIZZLE_0:
626 return V_008F0C_SQ_SEL_0;
627 case PIPE_SWIZZLE_1:
628 return V_008F0C_SQ_SEL_1;
629 default: /* PIPE_SWIZZLE_X */
630 return V_008F0C_SQ_SEL_X;
631 }
632 }
633
634 static void
radv_compose_swizzle(const struct util_format_description * desc,const VkComponentMapping * mapping,enum pipe_swizzle swizzle[4])635 radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
636 enum pipe_swizzle swizzle[4])
637 {
638 if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
639 /* 64-bit formats only support storage images and storage images
640 * require identity component mappings. We use 32-bit
641 * instructions to access 64-bit images, so we need a special
642 * case here.
643 *
644 * The zw components are 1,0 so that they can be easily be used
645 * by loads to create the w component, which has to be 0 for
646 * NULL descriptors.
647 */
648 swizzle[0] = PIPE_SWIZZLE_X;
649 swizzle[1] = PIPE_SWIZZLE_Y;
650 swizzle[2] = PIPE_SWIZZLE_1;
651 swizzle[3] = PIPE_SWIZZLE_0;
652 } else if (!mapping) {
653 for (unsigned i = 0; i < 4; i++)
654 swizzle[i] = desc->swizzle[i];
655 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
656 const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
657 PIPE_SWIZZLE_1};
658 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
659 } else {
660 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
661 }
662 }
663
664 static void
radv_make_buffer_descriptor(struct radv_device * device,struct radv_buffer * buffer,VkFormat vk_format,unsigned offset,unsigned range,uint32_t * state)665 radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer,
666 VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state)
667 {
668 const struct util_format_description *desc;
669 unsigned stride;
670 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
671 uint64_t va = gpu_address + buffer->offset;
672 unsigned num_format, data_format;
673 int first_non_void;
674 enum pipe_swizzle swizzle[4];
675 desc = vk_format_description(vk_format);
676 first_non_void = vk_format_get_first_non_void_channel(vk_format);
677 stride = desc->block.bits / 8;
678
679 radv_compose_swizzle(desc, NULL, swizzle);
680
681 va += offset;
682 state[0] = va;
683 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
684
685 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
686 range /= stride;
687 }
688
689 state[2] = range;
690 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
691 S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
692 S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
693 S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
694
695 if (device->physical_device->rad_info.chip_class >= GFX10) {
696 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
697
698 /* OOB_SELECT chooses the out-of-bounds check:
699 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
700 * - 1: index >= NUM_RECORDS
701 * - 2: NUM_RECORDS == 0
702 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
703 * else: swizzle_address >= NUM_RECORDS
704 */
705 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
706 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
707 S_008F0C_RESOURCE_LEVEL(1);
708 } else {
709 num_format = radv_translate_buffer_numformat(desc, first_non_void);
710 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
711
712 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
713 assert(num_format != ~0);
714
715 state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
716 }
717 }
718
719 static void
si_set_mutable_tex_desc_fields(struct radv_device * device,struct radv_image * image,const struct legacy_surf_level * base_level_info,unsigned plane_id,unsigned base_level,unsigned first_level,unsigned block_width,bool is_stencil,bool is_storage_image,bool disable_compression,bool enable_write_compression,uint32_t * state)720 si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
721 const struct legacy_surf_level *base_level_info, unsigned plane_id,
722 unsigned base_level, unsigned first_level, unsigned block_width,
723 bool is_stencil, bool is_storage_image, bool disable_compression,
724 bool enable_write_compression, uint32_t *state)
725 {
726 struct radv_image_plane *plane = &image->planes[plane_id];
727 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
728 uint64_t va = gpu_address;
729 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
730 uint64_t meta_va = 0;
731 if (chip_class >= GFX9) {
732 if (is_stencil)
733 va += plane->surface.u.gfx9.zs.stencil_offset;
734 else
735 va += plane->surface.u.gfx9.surf_offset;
736 } else
737 va += (uint64_t)base_level_info->offset_256B * 256;
738
739 state[0] = va >> 8;
740 if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
741 state[0] |= plane->surface.tile_swizzle;
742 state[1] &= C_008F14_BASE_ADDRESS_HI;
743 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
744
745 if (chip_class >= GFX8) {
746 state[6] &= C_008F28_COMPRESSION_EN;
747 state[7] = 0;
748 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
749 meta_va = gpu_address + plane->surface.meta_offset;
750 if (chip_class <= GFX8)
751 meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
752
753 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
754 dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1;
755 meta_va |= dcc_tile_swizzle;
756 } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) {
757 meta_va = gpu_address + plane->surface.meta_offset;
758 }
759
760 if (meta_va) {
761 state[6] |= S_008F28_COMPRESSION_EN(1);
762 if (chip_class <= GFX9)
763 state[7] = meta_va >> 8;
764 }
765 }
766
767 if (chip_class >= GFX10) {
768 state[3] &= C_00A00C_SW_MODE;
769
770 if (is_stencil) {
771 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
772 } else {
773 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
774 }
775
776 state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED;
777
778 if (meta_va) {
779 struct gfx9_surf_meta_flags meta = {
780 .rb_aligned = 1,
781 .pipe_aligned = 1,
782 };
783
784 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
785 meta = plane->surface.u.gfx9.color.dcc;
786
787 if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
788 state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
789
790 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
791 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
792 }
793
794 state[7] = meta_va >> 16;
795 } else if (chip_class == GFX9) {
796 state[3] &= C_008F1C_SW_MODE;
797 state[4] &= C_008F20_PITCH;
798
799 if (is_stencil) {
800 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
801 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.zs.stencil_epitch);
802 } else {
803 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
804 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch);
805 }
806
807 state[5] &=
808 C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
809 if (meta_va) {
810 struct gfx9_surf_meta_flags meta = {
811 .rb_aligned = 1,
812 .pipe_aligned = 1,
813 };
814
815 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
816 meta = plane->surface.u.gfx9.color.dcc;
817
818 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
819 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
820 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
821 }
822 } else {
823 /* GFX6-GFX8 */
824 unsigned pitch = base_level_info->nblk_x * block_width;
825 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
826
827 state[3] &= C_008F1C_TILING_INDEX;
828 state[3] |= S_008F1C_TILING_INDEX(index);
829 state[4] &= C_008F20_PITCH;
830 state[4] |= S_008F20_PITCH(pitch - 1);
831 }
832 }
833
834 static unsigned
radv_tex_dim(VkImageType image_type,VkImageViewType view_type,unsigned nr_layers,unsigned nr_samples,bool is_storage_image,bool gfx9)835 radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
836 unsigned nr_samples, bool is_storage_image, bool gfx9)
837 {
838 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
839 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
840
841 /* GFX9 allocates 1D textures as 2D. */
842 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
843 image_type = VK_IMAGE_TYPE_2D;
844 switch (image_type) {
845 case VK_IMAGE_TYPE_1D:
846 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
847 case VK_IMAGE_TYPE_2D:
848 if (nr_samples > 1)
849 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
850 else
851 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
852 case VK_IMAGE_TYPE_3D:
853 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
854 return V_008F1C_SQ_RSRC_IMG_3D;
855 else
856 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
857 default:
858 unreachable("illegal image type");
859 }
860 }
861
862 static unsigned
gfx9_border_color_swizzle(const struct util_format_description * desc)863 gfx9_border_color_swizzle(const struct util_format_description *desc)
864 {
865 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
866
867 if (desc->swizzle[3] == PIPE_SWIZZLE_X) {
868 /* For the pre-defined border color values (white, opaque
869 * black, transparent black), the only thing that matters is
870 * that the alpha channel winds up in the correct place
871 * (because the RGB channels are all the same) so either of
872 * these enumerations will work.
873 */
874 if (desc->swizzle[2] == PIPE_SWIZZLE_Y)
875 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
876 else
877 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
878 } else if (desc->swizzle[0] == PIPE_SWIZZLE_X) {
879 if (desc->swizzle[1] == PIPE_SWIZZLE_Y)
880 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
881 else
882 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
883 } else if (desc->swizzle[1] == PIPE_SWIZZLE_X) {
884 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
885 } else if (desc->swizzle[2] == PIPE_SWIZZLE_X) {
886 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
887 }
888
889 return bc_swizzle;
890 }
891
892 bool
vi_alpha_is_on_msb(struct radv_device * device,VkFormat format)893 vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
894 {
895 const struct util_format_description *desc = vk_format_description(format);
896
897 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
898 return desc->swizzle[3] == PIPE_SWIZZLE_X;
899
900 return radv_translate_colorswap(format, false) <= 1;
901 }
902 /**
903 * Build the sampler view descriptor for a texture (GFX10).
904 */
905 static void
gfx10_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)906 gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
907 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
908 const VkComponentMapping *mapping, unsigned first_level,
909 unsigned last_level, unsigned first_layer, unsigned last_layer,
910 unsigned width, unsigned height, unsigned depth, uint32_t *state,
911 uint32_t *fmask_state)
912 {
913 const struct util_format_description *desc;
914 enum pipe_swizzle swizzle[4];
915 unsigned img_format;
916 unsigned type;
917
918 desc = vk_format_description(vk_format);
919 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
920
921 radv_compose_swizzle(desc, mapping, swizzle);
922
923 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
924 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
925 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
926 height = 1;
927 depth = image->info.array_size;
928 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
929 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
930 depth = image->info.array_size;
931 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
932 depth = image->info.array_size / 6;
933
934 state[0] = 0;
935 state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
936 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
937 S_00A008_RESOURCE_LEVEL(1);
938 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
939 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
940 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
941 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
942 S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
943 S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
944 : last_level) |
945 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type);
946 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
947 * to know the total number of layers.
948 */
949 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
950 S_00A010_BASE_ARRAY(first_layer);
951 state[5] = S_00A014_ARRAY_PITCH(0) |
952 S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
953 : image->info.levels - 1) |
954 S_00A014_PERF_MOD(4);
955 state[6] = 0;
956 state[7] = 0;
957
958 if (radv_dcc_enabled(image, first_level)) {
959 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
960 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
961 image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) |
962 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
963 }
964
965 if (radv_image_get_iterate256(device, image)) {
966 state[6] |= S_00A018_ITERATE_256(1);
967 }
968
969 /* Initialize the sampler view for FMASK. */
970 if (fmask_state) {
971 if (radv_image_has_fmask(image)) {
972 uint64_t gpu_address = radv_buffer_get_va(image->bo);
973 uint32_t format;
974 uint64_t va;
975
976 assert(image->plane_count == 1);
977
978 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
979
980 switch (image->info.samples) {
981 case 2:
982 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
983 break;
984 case 4:
985 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
986 break;
987 case 8:
988 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
989 break;
990 default:
991 unreachable("invalid nr_samples");
992 }
993
994 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
995 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
996 S_00A004_WIDTH_LO(width - 1);
997 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
998 S_00A008_RESOURCE_LEVEL(1);
999 fmask_state[3] =
1000 S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1001 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1002 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) |
1003 S_00A00C_TYPE(
1004 radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1005 fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
1006 fmask_state[5] = 0;
1007 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
1008 fmask_state[7] = 0;
1009
1010 if (radv_image_is_tc_compat_cmask(image)) {
1011 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1012
1013 fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
1014 fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
1015 fmask_state[7] |= va >> 16;
1016 }
1017 } else
1018 memset(fmask_state, 0, 8 * 4);
1019 }
1020 }
1021
1022 /**
1023 * Build the sampler view descriptor for a texture (SI-GFX9)
1024 */
1025 static void
si_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)1026 si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1027 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1028 const VkComponentMapping *mapping, unsigned first_level,
1029 unsigned last_level, unsigned first_layer, unsigned last_layer,
1030 unsigned width, unsigned height, unsigned depth, uint32_t *state,
1031 uint32_t *fmask_state)
1032 {
1033 const struct util_format_description *desc;
1034 enum pipe_swizzle swizzle[4];
1035 int first_non_void;
1036 unsigned num_format, data_format, type;
1037
1038 desc = vk_format_description(vk_format);
1039
1040 radv_compose_swizzle(desc, mapping, swizzle);
1041
1042 first_non_void = vk_format_get_first_non_void_channel(vk_format);
1043
1044 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
1045 if (num_format == ~0) {
1046 num_format = 0;
1047 }
1048
1049 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
1050 if (data_format == ~0) {
1051 data_format = 0;
1052 }
1053
1054 /* S8 with either Z16 or Z32 HTILE need a special format. */
1055 if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
1056 radv_image_is_tc_compat_htile(image)) {
1057 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
1058 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
1059 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
1060 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
1061 }
1062 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
1063 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
1064 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
1065 height = 1;
1066 depth = image->info.array_size;
1067 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
1068 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
1069 depth = image->info.array_size;
1070 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
1071 depth = image->info.array_size / 6;
1072
1073 state[0] = 0;
1074 state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format));
1075 state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
1076 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1077 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1078 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1079 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1080 S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1081 S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1082 : last_level) |
1083 S_008F1C_TYPE(type));
1084 state[4] = 0;
1085 state[5] = S_008F24_BASE_ARRAY(first_layer);
1086 state[6] = 0;
1087 state[7] = 0;
1088
1089 if (device->physical_device->rad_info.chip_class == GFX9) {
1090 unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
1091
1092 /* Depth is the last accessible layer on Gfx9.
1093 * The hw doesn't need to know the total number of layers.
1094 */
1095 if (type == V_008F1C_SQ_RSRC_IMG_3D)
1096 state[4] |= S_008F20_DEPTH(depth - 1);
1097 else
1098 state[4] |= S_008F20_DEPTH(last_layer);
1099
1100 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1101 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
1102 : image->info.levels - 1);
1103 } else {
1104 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1105 state[4] |= S_008F20_DEPTH(depth - 1);
1106 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1107 }
1108 if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
1109 image->planes[0].surface.meta_offset) {
1110 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1111 } else {
1112 /* The last dword is unused by hw. The shader uses it to clear
1113 * bits in the first dword of sampler state.
1114 */
1115 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1116 if (first_level == last_level)
1117 state[7] = C_008F30_MAX_ANISO_RATIO;
1118 else
1119 state[7] = 0xffffffff;
1120 }
1121 }
1122
1123 /* Initialize the sampler view for FMASK. */
1124 if (fmask_state) {
1125 if (radv_image_has_fmask(image)) {
1126 uint32_t fmask_format;
1127 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1128 uint64_t va;
1129
1130 assert(image->plane_count == 1);
1131
1132 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1133
1134 if (device->physical_device->rad_info.chip_class == GFX9) {
1135 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1136 switch (image->info.samples) {
1137 case 2:
1138 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
1139 break;
1140 case 4:
1141 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
1142 break;
1143 case 8:
1144 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
1145 break;
1146 default:
1147 unreachable("invalid nr_samples");
1148 }
1149 } else {
1150 switch (image->info.samples) {
1151 case 2:
1152 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1153 break;
1154 case 4:
1155 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1156 break;
1157 case 8:
1158 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1159 break;
1160 default:
1161 assert(0);
1162 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1163 }
1164 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1165 }
1166
1167 fmask_state[0] = va >> 8;
1168 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1169 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
1170 S_008F14_NUM_FORMAT(num_format);
1171 fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
1172 fmask_state[3] =
1173 S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1174 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1175 S_008F1C_TYPE(
1176 radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1177 fmask_state[4] = 0;
1178 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1179 fmask_state[6] = 0;
1180 fmask_state[7] = 0;
1181
1182 if (device->physical_device->rad_info.chip_class == GFX9) {
1183 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
1184 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1185 S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
1186 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
1187
1188 if (radv_image_is_tc_compat_cmask(image)) {
1189 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1190
1191 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1192 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1193 fmask_state[7] |= va >> 8;
1194 }
1195 } else {
1196 fmask_state[3] |=
1197 S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index);
1198 fmask_state[4] |=
1199 S_008F20_DEPTH(depth - 1) |
1200 S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1);
1201 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1202
1203 if (radv_image_is_tc_compat_cmask(image)) {
1204 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1205
1206 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1207 fmask_state[7] |= va >> 8;
1208 }
1209 }
1210 } else
1211 memset(fmask_state, 0, 8 * 4);
1212 }
1213 }
1214
1215 static void
radv_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)1216 radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1217 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1218 const VkComponentMapping *mapping, unsigned first_level,
1219 unsigned last_level, unsigned first_layer, unsigned last_layer,
1220 unsigned width, unsigned height, unsigned depth, uint32_t *state,
1221 uint32_t *fmask_state)
1222 {
1223 if (device->physical_device->rad_info.chip_class >= GFX10) {
1224 gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1225 first_level, last_level, first_layer, last_layer, width, height,
1226 depth, state, fmask_state);
1227 } else {
1228 si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1229 first_level, last_level, first_layer, last_layer, width, height,
1230 depth, state, fmask_state);
1231 }
1232 }
1233
1234 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * md)1235 radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
1236 struct radeon_bo_metadata *md)
1237 {
1238 static const VkComponentMapping fixedmapping;
1239 uint32_t desc[8];
1240
1241 assert(image->plane_count == 1);
1242
1243 radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->type,
1244 image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0,
1245 image->info.array_size - 1, image->info.width, image->info.height,
1246 image->info.depth, desc, NULL);
1247
1248 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
1249 0, image->planes[0].surface.blk_w, false, false, false, false,
1250 desc);
1251
1252 ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
1253 image->info.levels, desc, &md->size_metadata, md->metadata);
1254 }
1255
1256 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)1257 radv_init_metadata(struct radv_device *device, struct radv_image *image,
1258 struct radeon_bo_metadata *metadata)
1259 {
1260 struct radeon_surf *surface = &image->planes[0].surface;
1261
1262 memset(metadata, 0, sizeof(*metadata));
1263
1264 if (device->physical_device->rad_info.chip_class >= GFX9) {
1265 uint64_t dcc_offset =
1266 image->offset +
1267 (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
1268 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
1269 metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
1270 metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
1271 metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
1272 metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
1273 metadata->u.gfx9.dcc_max_compressed_block_size =
1274 surface->u.gfx9.color.dcc.max_compressed_block_size;
1275 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1276 } else {
1277 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
1278 ? RADEON_LAYOUT_TILED
1279 : RADEON_LAYOUT_LINEAR;
1280 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
1281 ? RADEON_LAYOUT_TILED
1282 : RADEON_LAYOUT_LINEAR;
1283 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1284 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1285 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1286 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1287 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1288 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1289 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1290 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1291 }
1292 radv_query_opaque_metadata(device, image, metadata);
1293 }
1294
1295 void
radv_image_override_offset_stride(struct radv_device * device,struct radv_image * image,uint64_t offset,uint32_t stride)1296 radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
1297 uint64_t offset, uint32_t stride)
1298 {
1299 ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
1300 image->info.levels, offset, stride);
1301 }
1302
1303 static void
radv_image_alloc_single_sample_cmask(const struct radv_device * device,const struct radv_image * image,struct radeon_surf * surf)1304 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1305 const struct radv_image *image, struct radeon_surf *surf)
1306 {
1307 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 ||
1308 image->info.depth > 1 || radv_image_has_dcc(image) ||
1309 !radv_image_use_fast_clear_for_image(device, image) ||
1310 (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
1311 return;
1312
1313 assert(image->info.storage_samples == 1);
1314
1315 surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
1316 surf->total_size = surf->cmask_offset + surf->cmask_size;
1317 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
1318 }
1319
1320 static void
radv_image_alloc_values(const struct radv_device * device,struct radv_image * image)1321 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1322 {
1323 /* images with modifiers can be potentially imported */
1324 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
1325 return;
1326
1327 if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
1328 image->fce_pred_offset = image->size;
1329 image->size += 8 * image->info.levels;
1330 }
1331
1332 if (radv_image_use_dcc_predication(device, image)) {
1333 image->dcc_pred_offset = image->size;
1334 image->size += 8 * image->info.levels;
1335 }
1336
1337 if ((radv_image_has_dcc(image) && !image->support_comp_to_single) ||
1338 radv_image_has_cmask(image) || radv_image_has_htile(image)) {
1339 image->clear_value_offset = image->size;
1340 image->size += 8 * image->info.levels;
1341 }
1342
1343 if (radv_image_is_tc_compat_htile(image) &&
1344 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1345 /* Metadata for the TC-compatible HTILE hardware bug which
1346 * have to be fixed by updating ZRANGE_PRECISION when doing
1347 * fast depth clears to 0.0f.
1348 */
1349 image->tc_compat_zrange_offset = image->size;
1350 image->size += image->info.levels * 4;
1351 }
1352 }
1353
1354 /* Determine if the image is affected by the pipe misaligned metadata issue
1355 * which requires to invalidate L2.
1356 */
1357 static bool
radv_image_is_pipe_misaligned(const struct radv_device * device,const struct radv_image * image)1358 radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
1359 {
1360 struct radeon_info *rad_info = &device->physical_device->rad_info;
1361 int log2_samples = util_logbase2(image->info.samples);
1362
1363 assert(rad_info->chip_class >= GFX10);
1364
1365 for (unsigned i = 0; i < image->plane_count; ++i) {
1366 VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
1367 int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
1368 int log2_bpp_and_samples;
1369
1370 if (rad_info->chip_class >= GFX10_3) {
1371 log2_bpp_and_samples = log2_bpp + log2_samples;
1372 } else {
1373 if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
1374 log2_bpp = 2;
1375 }
1376
1377 log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
1378 }
1379
1380 int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
1381 int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
1382
1383 if (vk_format_has_depth(image->vk_format)) {
1384 if (radv_image_is_tc_compat_htile(image) && overlap) {
1385 return true;
1386 }
1387 } else {
1388 int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
1389 int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
1390 int samples_overlap = MIN2(log2_samples, overlap);
1391
1392 /* TODO: It shouldn't be necessary if the image has DCC but
1393 * not readable by shader.
1394 */
1395 if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
1396 (samples_overlap > log2_samples_frag_diff)) {
1397 return true;
1398 }
1399 }
1400 }
1401
1402 return false;
1403 }
1404
1405 static bool
radv_image_is_l2_coherent(const struct radv_device * device,const struct radv_image * image)1406 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
1407 {
1408 if (device->physical_device->rad_info.chip_class >= GFX10) {
1409 return !device->physical_device->rad_info.tcc_rb_non_coherent &&
1410 !radv_image_is_pipe_misaligned(device, image);
1411 } else if (device->physical_device->rad_info.chip_class == GFX9) {
1412 if (image->info.samples == 1 &&
1413 (image->usage &
1414 (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1415 !vk_format_has_stencil(image->vk_format)) {
1416 /* Single-sample color and single-sample depth
1417 * (not stencil) are coherent with shaders on
1418 * GFX9.
1419 */
1420 return true;
1421 }
1422 }
1423
1424 return false;
1425 }
1426
1427 /**
1428 * Determine if the given image can be fast cleared.
1429 */
1430 static bool
radv_image_can_fast_clear(const struct radv_device * device,const struct radv_image * image)1431 radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
1432 {
1433 if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
1434 return false;
1435
1436 if (vk_format_is_color(image->vk_format)) {
1437 if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
1438 return false;
1439
1440 /* RB+ doesn't work with CMASK fast clear on Stoney. */
1441 if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
1442 return false;
1443 } else {
1444 if (!radv_image_has_htile(image))
1445 return false;
1446 }
1447
1448 /* Do not fast clears 3D images. */
1449 if (image->type == VK_IMAGE_TYPE_3D)
1450 return false;
1451
1452 return true;
1453 }
1454
1455 /**
1456 * Determine if the given image can be fast cleared using comp-to-single.
1457 */
1458 static bool
radv_image_use_comp_to_single(const struct radv_device * device,const struct radv_image * image)1459 radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
1460 {
1461 /* comp-to-single is only available for GFX10+. */
1462 if (device->physical_device->rad_info.chip_class < GFX10)
1463 return false;
1464
1465 /* If the image can't be fast cleared, comp-to-single can't be used. */
1466 if (!radv_image_can_fast_clear(device, image))
1467 return false;
1468
1469 /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
1470 if (!radv_image_has_dcc(image))
1471 return false;
1472
1473 /* It seems 8bpp and 16bpp require RB+ to work. */
1474 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk_format);
1475 if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed)
1476 return false;
1477
1478 return true;
1479 }
1480
1481 static void
radv_image_reset_layout(struct radv_image * image)1482 radv_image_reset_layout(struct radv_image *image)
1483 {
1484 image->size = 0;
1485 image->alignment = 1;
1486
1487 image->tc_compatible_cmask = 0;
1488 image->fce_pred_offset = image->dcc_pred_offset = 0;
1489 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1490
1491 for (unsigned i = 0; i < image->plane_count; ++i) {
1492 VkFormat format = vk_format_get_plane_format(image->vk_format, i);
1493 if (vk_format_has_depth(format))
1494 format = vk_format_depth_only(format);
1495
1496 uint64_t flags = image->planes[i].surface.flags;
1497 uint64_t modifier = image->planes[i].surface.modifier;
1498 memset(image->planes + i, 0, sizeof(image->planes[i]));
1499
1500 image->planes[i].surface.flags = flags;
1501 image->planes[i].surface.modifier = modifier;
1502 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1503 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1504 image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1505
1506 /* align byte per element on dword */
1507 if (image->planes[i].surface.bpe == 3) {
1508 image->planes[i].surface.bpe = 4;
1509 }
1510 }
1511 }
1512
1513 VkResult
radv_image_create_layout(struct radv_device * device,struct radv_image_create_info create_info,const struct VkImageDrmFormatModifierExplicitCreateInfoEXT * mod_info,struct radv_image * image)1514 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1515 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1516 struct radv_image *image)
1517 {
1518 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1519 * common internal case. */
1520 create_info.vk_info = NULL;
1521
1522 struct ac_surf_info image_info = image->info;
1523 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1524 if (result != VK_SUCCESS)
1525 return result;
1526
1527 assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1528
1529 radv_image_reset_layout(image);
1530
1531 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1532 struct ac_surf_info info = image_info;
1533 uint64_t offset;
1534 unsigned stride;
1535
1536 info.width = vk_format_get_plane_width(image->vk_format, plane, info.width);
1537 info.height = vk_format_get_plane_height(image->vk_format, plane, info.height);
1538
1539 if (create_info.no_metadata_planes || image->plane_count > 1) {
1540 image->planes[plane].surface.flags |=
1541 RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1542 }
1543
1544 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1545
1546 if (plane == 0) {
1547 if (!radv_use_dcc_for_image_late(device, image))
1548 ac_surface_zero_dcc_fields(&image->planes[0].surface);
1549 }
1550
1551 if (create_info.bo_metadata && !mod_info &&
1552 !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
1553 &image->planes[plane].surface, image_info.storage_samples,
1554 image_info.levels, create_info.bo_metadata->size_metadata,
1555 create_info.bo_metadata->metadata))
1556 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1557
1558 if (!create_info.no_metadata_planes && !create_info.bo_metadata && image->plane_count == 1 &&
1559 !mod_info)
1560 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1561
1562 if (mod_info) {
1563 if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1564 !mod_info->pPlaneLayouts[plane].rowPitch)
1565 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1566
1567 offset = mod_info->pPlaneLayouts[plane].offset;
1568 stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1569 } else {
1570 offset = align64(image->size, 1 << image->planes[plane].surface.alignment_log2);
1571 stride = 0; /* 0 means no override */
1572 }
1573
1574 if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
1575 &image->planes[plane].surface, image->info.levels,
1576 offset, stride))
1577 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1578
1579 /* Validate DCC offsets in modifier layout. */
1580 if (image->plane_count == 1 && mod_info) {
1581 unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1582 if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1583 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1584
1585 for (unsigned i = 1; i < mem_planes; ++i) {
1586 if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1587 &image->planes[plane].surface, i,
1588 0) != mod_info->pPlaneLayouts[i].offset)
1589 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1590 }
1591 }
1592
1593 image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1594 image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1595
1596 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1597 }
1598
1599 image->tc_compatible_cmask =
1600 radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1601
1602 image->l2_coherent = radv_image_is_l2_coherent(device, image);
1603
1604 image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1605
1606 radv_image_alloc_values(device, image);
1607
1608 assert(image->planes[0].surface.surf_size);
1609 assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1610 ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1611 return VK_SUCCESS;
1612 }
1613
1614 static void
radv_destroy_image(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_image * image)1615 radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1616 struct radv_image *image)
1617 {
1618 if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1619 device->ws->buffer_destroy(device->ws, image->bo);
1620
1621 if (image->owned_memory != VK_NULL_HANDLE) {
1622 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1623 radv_free_memory(device, pAllocator, mem);
1624 }
1625
1626 vk_object_base_finish(&image->base);
1627 vk_free2(&device->vk.alloc, pAllocator, image);
1628 }
1629
1630 static void
radv_image_print_info(struct radv_device * device,struct radv_image * image)1631 radv_image_print_info(struct radv_device *device, struct radv_image *image)
1632 {
1633 fprintf(stderr, "Image:\n");
1634 fprintf(stderr,
1635 " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1636 "width=%" PRIu32 ", height=%" PRIu32 ", "
1637 "offset=%" PRIu64 ", array_size=%" PRIu32 "\n",
1638 image->size, image->alignment, image->info.width, image->info.height, image->offset,
1639 image->info.array_size);
1640 for (unsigned i = 0; i < image->plane_count; ++i) {
1641 const struct radv_image_plane *plane = &image->planes[i];
1642 const struct radeon_surf *surf = &plane->surface;
1643 const struct util_format_description *desc = vk_format_description(plane->format);
1644 uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1645 &plane->surface, 0, 0);
1646
1647 fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1648
1649 ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
1650 }
1651 }
1652
1653 static uint64_t
radv_select_modifier(const struct radv_device * dev,VkFormat format,const struct VkImageDrmFormatModifierListCreateInfoEXT * mod_list)1654 radv_select_modifier(const struct radv_device *dev, VkFormat format,
1655 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1656 {
1657 const struct radv_physical_device *pdev = dev->physical_device;
1658 unsigned mod_count;
1659
1660 assert(mod_list->drmFormatModifierCount);
1661
1662 /* We can allow everything here as it does not affect order and the application
1663 * is only allowed to specify modifiers that we support. */
1664 const struct ac_modifier_options modifier_options = {
1665 .dcc = true,
1666 .dcc_retile = true,
1667 };
1668
1669 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1670 &mod_count, NULL);
1671
1672 uint64_t *mods = calloc(mod_count, sizeof(*mods));
1673
1674 /* If allocations fail, fall back to a dumber solution. */
1675 if (!mods)
1676 return mod_list->pDrmFormatModifiers[0];
1677
1678 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1679 &mod_count, mods);
1680
1681 for (unsigned i = 0; i < mod_count; ++i) {
1682 for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1683 if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1684 free(mods);
1685 return mod_list->pDrmFormatModifiers[j];
1686 }
1687 }
1688 }
1689 unreachable("App specified an invalid modifier");
1690 }
1691
1692 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage)1693 radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1694 const VkAllocationCallbacks *alloc, VkImage *pImage)
1695 {
1696 RADV_FROM_HANDLE(radv_device, device, _device);
1697 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1698 uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1699 struct radv_image *image = NULL;
1700 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1701 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1702 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1703 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1704 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1705 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1706
1707 const unsigned plane_count = vk_format_get_plane_count(format);
1708 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1709
1710 radv_assert(pCreateInfo->mipLevels > 0);
1711 radv_assert(pCreateInfo->arrayLayers > 0);
1712 radv_assert(pCreateInfo->samples > 0);
1713 radv_assert(pCreateInfo->extent.width > 0);
1714 radv_assert(pCreateInfo->extent.height > 0);
1715 radv_assert(pCreateInfo->extent.depth > 0);
1716
1717 image =
1718 vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1719 if (!image)
1720 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1721
1722 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1723
1724 image->type = pCreateInfo->imageType;
1725 image->info.width = pCreateInfo->extent.width;
1726 image->info.height = pCreateInfo->extent.height;
1727 image->info.depth = pCreateInfo->extent.depth;
1728 image->info.samples = pCreateInfo->samples;
1729 image->info.storage_samples = pCreateInfo->samples;
1730 image->info.array_size = pCreateInfo->arrayLayers;
1731 image->info.levels = pCreateInfo->mipLevels;
1732 image->info.num_channels = vk_format_get_nr_components(format);
1733
1734 image->vk_format = format;
1735 image->tiling = pCreateInfo->tiling;
1736 image->usage = pCreateInfo->usage;
1737 image->flags = pCreateInfo->flags;
1738 image->plane_count = plane_count;
1739
1740 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1741 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1742 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1743 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1744 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1745 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1746 else
1747 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1748 }
1749
1750 const VkExternalMemoryImageCreateInfo *external_info =
1751 vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1752
1753 image->shareable = external_info;
1754 if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
1755 !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
1756 pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1757 image->info.surf_index = &device->image_mrt_offset_counter;
1758 }
1759
1760 if (mod_list)
1761 modifier = radv_select_modifier(device, format, mod_list);
1762 else if (explicit_mod)
1763 modifier = explicit_mod->drmFormatModifier;
1764
1765 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1766 image->planes[plane].surface.flags =
1767 radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1768 image->planes[plane].surface.modifier = modifier;
1769 }
1770
1771 bool delay_layout =
1772 external_info && (external_info->handleTypes &
1773 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1774
1775 if (delay_layout) {
1776 *pImage = radv_image_to_handle(image);
1777 assert(!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1778 return VK_SUCCESS;
1779 }
1780
1781 VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
1782 if (result != VK_SUCCESS) {
1783 radv_destroy_image(device, alloc, image);
1784 return result;
1785 }
1786
1787 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1788 image->alignment = MAX2(image->alignment, 4096);
1789 image->size = align64(image->size, image->alignment);
1790 image->offset = 0;
1791
1792 result =
1793 device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
1794 RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo);
1795 if (result != VK_SUCCESS) {
1796 radv_destroy_image(device, alloc, image);
1797 return vk_error(device, result);
1798 }
1799 }
1800
1801 if (device->instance->debug_flags & RADV_DEBUG_IMG) {
1802 radv_image_print_info(device, image);
1803 }
1804
1805 *pImage = radv_image_to_handle(image);
1806
1807 return VK_SUCCESS;
1808 }
1809
1810 static void
radv_image_view_make_descriptor(struct radv_image_view * iview,struct radv_device * device,VkFormat vk_format,const VkComponentMapping * components,bool is_storage_image,bool disable_compression,bool enable_compression,unsigned plane_id,unsigned descriptor_plane_id)1811 radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
1812 VkFormat vk_format, const VkComponentMapping *components,
1813 bool is_storage_image, bool disable_compression,
1814 bool enable_compression, unsigned plane_id,
1815 unsigned descriptor_plane_id)
1816 {
1817 struct radv_image *image = iview->image;
1818 struct radv_image_plane *plane = &image->planes[plane_id];
1819 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1820 uint32_t blk_w;
1821 union radv_descriptor *descriptor;
1822 uint32_t hw_level = 0;
1823
1824 if (is_storage_image) {
1825 descriptor = &iview->storage_descriptor;
1826 } else {
1827 descriptor = &iview->descriptor;
1828 }
1829
1830 assert(vk_format_get_plane_count(vk_format) == 1);
1831 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1832 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
1833 vk_format_get_blockwidth(vk_format);
1834
1835 if (device->physical_device->rad_info.chip_class >= GFX9)
1836 hw_level = iview->base_mip;
1837 radv_make_texture_descriptor(
1838 device, image, is_storage_image, iview->type, vk_format, components, hw_level,
1839 hw_level + iview->level_count - 1, iview->base_layer,
1840 iview->base_layer + iview->layer_count - 1,
1841 vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
1842 vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
1843 iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id],
1844 descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor);
1845
1846 const struct legacy_surf_level *base_level_info = NULL;
1847 if (device->physical_device->rad_info.chip_class <= GFX9) {
1848 if (is_stencil)
1849 base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->base_mip];
1850 else
1851 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1852 }
1853
1854 bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
1855 if (is_storage_image && !(enable_write_compression || enable_compression))
1856 disable_compression = true;
1857 si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip,
1858 iview->base_mip, blk_w, is_stencil, is_storage_image,
1859 disable_compression, enable_write_compression,
1860 descriptor->plane_descriptors[descriptor_plane_id]);
1861 }
1862
1863 static unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)1864 radv_plane_from_aspect(VkImageAspectFlags mask)
1865 {
1866 switch (mask) {
1867 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1868 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1869 return 1;
1870 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1871 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1872 return 2;
1873 case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1874 return 3;
1875 default:
1876 return 0;
1877 }
1878 }
1879
1880 VkFormat
radv_get_aspect_format(struct radv_image * image,VkImageAspectFlags mask)1881 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1882 {
1883 switch (mask) {
1884 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1885 return image->planes[0].format;
1886 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1887 return image->planes[1].format;
1888 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1889 return image->planes[2].format;
1890 case VK_IMAGE_ASPECT_STENCIL_BIT:
1891 return vk_format_stencil_only(image->vk_format);
1892 case VK_IMAGE_ASPECT_DEPTH_BIT:
1893 return vk_format_depth_only(image->vk_format);
1894 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1895 return vk_format_depth_only(image->vk_format);
1896 default:
1897 return image->vk_format;
1898 }
1899 }
1900
1901 /**
1902 * Determine if the given image view can be fast cleared.
1903 */
1904 static bool
radv_image_view_can_fast_clear(const struct radv_device * device,const struct radv_image_view * iview)1905 radv_image_view_can_fast_clear(const struct radv_device *device,
1906 const struct radv_image_view *iview)
1907 {
1908 struct radv_image *image;
1909
1910 if (!iview)
1911 return false;
1912 image = iview->image;
1913
1914 /* Only fast clear if the image itself can be fast cleared. */
1915 if (!radv_image_can_fast_clear(device, image))
1916 return false;
1917
1918 /* Only fast clear if all layers are bound. */
1919 if (iview->base_layer > 0 || iview->layer_count != image->info.array_size)
1920 return false;
1921
1922 /* Only fast clear if the view covers the whole image. */
1923 if (!radv_image_extent_compare(image, &iview->extent))
1924 return false;
1925
1926 return true;
1927 }
1928
1929 void
radv_image_view_init(struct radv_image_view * iview,struct radv_device * device,const VkImageViewCreateInfo * pCreateInfo,const struct radv_image_view_extra_create_info * extra_create_info)1930 radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
1931 const VkImageViewCreateInfo *pCreateInfo,
1932 const struct radv_image_view_extra_create_info *extra_create_info)
1933 {
1934 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1935 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1936 uint32_t plane_count = 1;
1937
1938 vk_object_base_init(&device->vk, &iview->base, VK_OBJECT_TYPE_IMAGE_VIEW);
1939
1940 switch (image->type) {
1941 case VK_IMAGE_TYPE_1D:
1942 case VK_IMAGE_TYPE_2D:
1943 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
1944 image->info.array_size);
1945 break;
1946 case VK_IMAGE_TYPE_3D:
1947 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
1948 radv_minify(image->info.depth, range->baseMipLevel));
1949 break;
1950 default:
1951 unreachable("bad VkImageType");
1952 }
1953 iview->image = image;
1954 iview->type = pCreateInfo->viewType;
1955 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1956 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1957 iview->base_layer = range->baseArrayLayer;
1958 iview->layer_count = radv_get_layerCount(image, range);
1959 iview->base_mip = range->baseMipLevel;
1960 iview->level_count = radv_get_levelCount(image, range);
1961
1962 iview->vk_format = pCreateInfo->format;
1963
1964 /* If the image has an Android external format, pCreateInfo->format will be
1965 * VK_FORMAT_UNDEFINED. */
1966 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1967 iview->vk_format = image->vk_format;
1968
1969 /* Split out the right aspect. Note that for internal meta code we sometimes
1970 * use an equivalent color format for the aspect so we first have to check
1971 * if we actually got depth/stencil formats. */
1972 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1973 if (vk_format_has_stencil(iview->vk_format))
1974 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1975 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1976 if (vk_format_has_depth(iview->vk_format))
1977 iview->vk_format = vk_format_depth_only(iview->vk_format);
1978 }
1979
1980 if (device->physical_device->rad_info.chip_class >= GFX9) {
1981 iview->extent = (VkExtent3D){
1982 .width = image->info.width,
1983 .height = image->info.height,
1984 .depth = image->info.depth,
1985 };
1986 } else {
1987 iview->extent = (VkExtent3D){
1988 .width = radv_minify(image->info.width, range->baseMipLevel),
1989 .height = radv_minify(image->info.height, range->baseMipLevel),
1990 .depth = radv_minify(image->info.depth, range->baseMipLevel),
1991 };
1992 }
1993
1994 if (iview->vk_format != image->planes[iview->plane_id].format) {
1995 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1996 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1997 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1998 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1999
2000 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
2001 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
2002
2003 /* Comment ported from amdvlk -
2004 * If we have the following image:
2005 * Uncompressed pixels Compressed block sizes (4x4)
2006 * mip0: 22 x 22 6 x 6
2007 * mip1: 11 x 11 3 x 3
2008 * mip2: 5 x 5 2 x 2
2009 * mip3: 2 x 2 1 x 1
2010 * mip4: 1 x 1 1 x 1
2011 *
2012 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and
2013 * the HW is calculating the degradation of the block sizes down the mip-chain as follows
2014 * (straight-up divide-by-two integer math): mip0: 6x6 mip1: 3x3 mip2: 1x1 mip3: 1x1
2015 *
2016 * This means that mip2 will be missing texels.
2017 *
2018 * Fix this by calculating the base mip's width and height, then convert
2019 * that, and round it back up to get the level 0 size. Clamp the
2020 * converted size between the original values, and the physical extent
2021 * of the base mipmap.
2022 *
2023 * On GFX10 we have to take care to not go over the physical extent
2024 * of the base mipmap as otherwise the GPU computes a different layout.
2025 * Note that the GPU does use the same base-mip dimensions for both a
2026 * block compatible format and the compressed format, so even if we take
2027 * the plain converted dimensions the physical layout is correct.
2028 */
2029 if (device->physical_device->rad_info.chip_class >= GFX9 &&
2030 vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) {
2031 /* If we have multiple levels in the view we should ideally take the last level,
2032 * but the mip calculation has a max(..., 1) so walking back to the base mip in an
2033 * useful way is hard. */
2034 if (iview->level_count > 1) {
2035 iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
2036 iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
2037 } else {
2038 unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel);
2039 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
2040
2041 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
2042 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
2043
2044 lvl_width <<= range->baseMipLevel;
2045 lvl_height <<= range->baseMipLevel;
2046
2047 iview->extent.width = CLAMP(lvl_width, iview->extent.width,
2048 iview->image->planes[0].surface.u.gfx9.base_mip_width);
2049 iview->extent.height = CLAMP(lvl_height, iview->extent.height,
2050 iview->image->planes[0].surface.u.gfx9.base_mip_height);
2051 }
2052 }
2053 }
2054
2055 iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
2056
2057 if (vk_format_get_plane_count(image->vk_format) > 1 &&
2058 iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
2059 plane_count = vk_format_get_plane_count(iview->vk_format);
2060 }
2061
2062 bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
2063 bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false;
2064 for (unsigned i = 0; i < plane_count; ++i) {
2065 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
2066 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false,
2067 disable_compression, enable_compression, iview->plane_id + i,
2068 i);
2069 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true,
2070 disable_compression, enable_compression, iview->plane_id + i,
2071 i);
2072 }
2073 }
2074
2075 void
radv_image_view_finish(struct radv_image_view * iview)2076 radv_image_view_finish(struct radv_image_view *iview)
2077 {
2078 vk_object_base_finish(&iview->base);
2079 }
2080
2081 bool
radv_layout_is_htile_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2082 radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
2083 VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
2084 {
2085 switch (layout) {
2086 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
2087 case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR:
2088 case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR:
2089 return radv_image_has_htile(image);
2090 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
2091 return radv_image_is_tc_compat_htile(image) ||
2092 (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
2093 case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
2094 case VK_IMAGE_LAYOUT_GENERAL:
2095 /* It should be safe to enable TC-compat HTILE with
2096 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
2097 * if the image doesn't have the storage bit set. This
2098 * improves performance for apps that use GENERAL for the main
2099 * depth pass because this allows compression and this reduces
2100 * the number of decompressions from/to GENERAL.
2101 */
2102 /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
2103 * queue is likely broken for eg. depth/stencil copies.
2104 */
2105 if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
2106 !in_render_loop && !device->instance->disable_tc_compat_htile_in_general) {
2107 return true;
2108 } else {
2109 return false;
2110 }
2111 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
2112 if (radv_image_is_tc_compat_htile(image) ||
2113 (radv_image_has_htile(image) &&
2114 !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
2115 /* Keep HTILE compressed if the image is only going to
2116 * be used as a depth/stencil read-only attachment.
2117 */
2118 return true;
2119 } else {
2120 return false;
2121 }
2122 break;
2123 default:
2124 return radv_image_is_tc_compat_htile(image);
2125 }
2126 }
2127
2128 bool
radv_layout_can_fast_clear(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2129 radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2130 unsigned level, VkImageLayout layout, bool in_render_loop,
2131 unsigned queue_mask)
2132 {
2133 if (radv_dcc_enabled(image, level) &&
2134 !radv_layout_dcc_compressed(device, image, level, layout, in_render_loop, queue_mask))
2135 return false;
2136
2137 if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2138 return false;
2139
2140 if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
2141 return false;
2142
2143 /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
2144 * images can only be fast-cleared if comp-to-single is supported because we don't yet support
2145 * FCE on the compute queue.
2146 */
2147 return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
2148 }
2149
2150 bool
radv_layout_dcc_compressed(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2151 radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2152 unsigned level, VkImageLayout layout, bool in_render_loop,
2153 unsigned queue_mask)
2154 {
2155 if (!radv_dcc_enabled(image, level))
2156 return false;
2157
2158 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
2159 return true;
2160
2161 /* If the image is read-only, we can always just keep it compressed */
2162 if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2163 return true;
2164
2165 /* Don't compress compute transfer dst when image stores are not supported. */
2166 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2167 (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
2168 return false;
2169
2170 return device->physical_device->rad_info.chip_class >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
2171 }
2172
2173 bool
radv_layout_fmask_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)2174 radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2175 VkImageLayout layout, unsigned queue_mask)
2176 {
2177 if (!radv_image_has_fmask(image))
2178 return false;
2179
2180 /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
2181 * expanded before.
2182 */
2183 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2184 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
2185 return false;
2186
2187 /* Only compress concurrent images if TC-compat CMASK is enabled (no FMASK decompression). */
2188 return layout != VK_IMAGE_LAYOUT_GENERAL &&
2189 (queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_is_tc_compat_cmask(image));
2190 }
2191
2192 unsigned
radv_image_queue_family_mask(const struct radv_image * image,uint32_t family,uint32_t queue_family)2193 radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
2194 {
2195 if (!image->exclusive)
2196 return image->queue_family_mask;
2197 if (family == VK_QUEUE_FAMILY_EXTERNAL || family == VK_QUEUE_FAMILY_FOREIGN_EXT)
2198 return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
2199 if (family == VK_QUEUE_FAMILY_IGNORED)
2200 return 1u << queue_family;
2201 return 1u << family;
2202 }
2203
2204 VkResult
radv_CreateImage(VkDevice device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)2205 radv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo,
2206 const VkAllocationCallbacks *pAllocator, VkImage *pImage)
2207 {
2208 #ifdef ANDROID
2209 const VkNativeBufferANDROID *gralloc_info =
2210 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
2211
2212 if (gralloc_info)
2213 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage);
2214 #endif
2215
2216 const struct wsi_image_create_info *wsi_info =
2217 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
2218 bool scanout = wsi_info && wsi_info->scanout;
2219
2220 return radv_image_create(device,
2221 &(struct radv_image_create_info){
2222 .vk_info = pCreateInfo,
2223 .scanout = scanout,
2224 },
2225 pAllocator, pImage);
2226 }
2227
2228 void
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)2229 radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
2230 {
2231 RADV_FROM_HANDLE(radv_device, device, _device);
2232 RADV_FROM_HANDLE(radv_image, image, _image);
2233
2234 if (!image)
2235 return;
2236
2237 radv_destroy_image(device, pAllocator, image);
2238 }
2239
2240 void
radv_GetImageSubresourceLayout(VkDevice _device,VkImage _image,const VkImageSubresource * pSubresource,VkSubresourceLayout * pLayout)2241 radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
2242 const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
2243 {
2244 RADV_FROM_HANDLE(radv_image, image, _image);
2245 RADV_FROM_HANDLE(radv_device, device, _device);
2246 int level = pSubresource->mipLevel;
2247 int layer = pSubresource->arrayLayer;
2248
2249 unsigned plane_id = 0;
2250 if (vk_format_get_plane_count(image->vk_format) > 1)
2251 plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2252
2253 struct radv_image_plane *plane = &image->planes[plane_id];
2254 struct radeon_surf *surface = &plane->surface;
2255
2256 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
2257 unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2258
2259 assert(level == 0);
2260 assert(layer == 0);
2261
2262 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2263 surface, mem_plane_id, 0);
2264 pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
2265 surface, mem_plane_id);
2266 pLayout->arrayPitch = 0;
2267 pLayout->depthPitch = 0;
2268 pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
2269 } else if (device->physical_device->rad_info.chip_class >= GFX9) {
2270 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
2271
2272 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2273 &plane->surface, 0, layer) +
2274 level_offset;
2275 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
2276 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
2277 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
2278 /* Adjust the number of bytes between each row because
2279 * the pitch is actually the number of components per
2280 * row.
2281 */
2282 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
2283 } else {
2284 uint32_t pitch =
2285 surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
2286
2287 assert(util_is_power_of_two_nonzero(surface->bpe));
2288 pLayout->rowPitch = pitch * surface->bpe;
2289 }
2290
2291 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
2292 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
2293 pLayout->size = surface->u.gfx9.surf_slice_size;
2294 if (image->type == VK_IMAGE_TYPE_3D)
2295 pLayout->size *= u_minify(image->info.depth, level);
2296 } else {
2297 pLayout->offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
2298 (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
2299 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
2300 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2301 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2302 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2303 if (image->type == VK_IMAGE_TYPE_3D)
2304 pLayout->size *= u_minify(image->info.depth, level);
2305 }
2306 }
2307
2308 VkResult
radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,VkImage _image,VkImageDrmFormatModifierPropertiesEXT * pProperties)2309 radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
2310 VkImageDrmFormatModifierPropertiesEXT *pProperties)
2311 {
2312 RADV_FROM_HANDLE(radv_image, image, _image);
2313
2314 pProperties->drmFormatModifier = image->planes[0].surface.modifier;
2315 return VK_SUCCESS;
2316 }
2317
2318 VkResult
radv_CreateImageView(VkDevice _device,const VkImageViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImageView * pView)2319 radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
2320 const VkAllocationCallbacks *pAllocator, VkImageView *pView)
2321 {
2322 RADV_FROM_HANDLE(radv_device, device, _device);
2323 struct radv_image_view *view;
2324
2325 view =
2326 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2327 if (view == NULL)
2328 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2329
2330 radv_image_view_init(view, device, pCreateInfo, NULL);
2331
2332 *pView = radv_image_view_to_handle(view);
2333
2334 return VK_SUCCESS;
2335 }
2336
2337 void
radv_DestroyImageView(VkDevice _device,VkImageView _iview,const VkAllocationCallbacks * pAllocator)2338 radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator)
2339 {
2340 RADV_FROM_HANDLE(radv_device, device, _device);
2341 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
2342
2343 if (!iview)
2344 return;
2345
2346 radv_image_view_finish(iview);
2347 vk_free2(&device->vk.alloc, pAllocator, iview);
2348 }
2349
2350 void
radv_buffer_view_init(struct radv_buffer_view * view,struct radv_device * device,const VkBufferViewCreateInfo * pCreateInfo)2351 radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2352 const VkBufferViewCreateInfo *pCreateInfo)
2353 {
2354 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
2355
2356 vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW);
2357
2358 view->bo = buffer->bo;
2359 view->range =
2360 pCreateInfo->range == VK_WHOLE_SIZE ? buffer->size - pCreateInfo->offset : pCreateInfo->range;
2361 view->vk_format = pCreateInfo->format;
2362
2363 radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range,
2364 view->state);
2365 }
2366
2367 void
radv_buffer_view_finish(struct radv_buffer_view * view)2368 radv_buffer_view_finish(struct radv_buffer_view *view)
2369 {
2370 vk_object_base_finish(&view->base);
2371 }
2372
2373 VkResult
radv_CreateBufferView(VkDevice _device,const VkBufferViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBufferView * pView)2374 radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
2375 const VkAllocationCallbacks *pAllocator, VkBufferView *pView)
2376 {
2377 RADV_FROM_HANDLE(radv_device, device, _device);
2378 struct radv_buffer_view *view;
2379
2380 view =
2381 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2382 if (!view)
2383 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2384
2385 radv_buffer_view_init(view, device, pCreateInfo);
2386
2387 *pView = radv_buffer_view_to_handle(view);
2388
2389 return VK_SUCCESS;
2390 }
2391
2392 void
radv_DestroyBufferView(VkDevice _device,VkBufferView bufferView,const VkAllocationCallbacks * pAllocator)2393 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
2394 const VkAllocationCallbacks *pAllocator)
2395 {
2396 RADV_FROM_HANDLE(radv_device, device, _device);
2397 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
2398
2399 if (!view)
2400 return;
2401
2402 radv_buffer_view_finish(view);
2403 vk_free2(&device->vk.alloc, pAllocator, view);
2404 }
2405