1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36 #include "vulkan/util/vk_format.h"
37
38 #include "gfx10_format_table.h"
39
40 static unsigned
radv_choose_tiling(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)41 radv_choose_tiling(struct radv_device *device,
42 const VkImageCreateInfo *pCreateInfo,
43 VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 if (!vk_format_is_compressed(format) &&
51 !vk_format_is_depth_or_stencil(format)
52 && device->physical_device->rad_info.chip_class <= GFX8) {
53 /* this causes hangs in some VK CTS tests on GFX9. */
54 /* Textures with a very small height are recommended to be linear. */
55 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
56 /* Only very thin and long 2D textures should benefit from
57 * linear_aligned. */
58 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
59 return RADEON_SURF_MODE_LINEAR_ALIGNED;
60 }
61
62 /* MSAA resources must be 2D tiled. */
63 if (pCreateInfo->samples > 1)
64 return RADEON_SURF_MODE_2D;
65
66 return RADEON_SURF_MODE_2D;
67 }
68
69 static bool
radv_use_tc_compat_htile_for_image(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)70 radv_use_tc_compat_htile_for_image(struct radv_device *device,
71 const VkImageCreateInfo *pCreateInfo,
72 VkFormat format)
73 {
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device->physical_device->rad_info.chip_class < GFX8)
76 return false;
77
78 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
79 return false;
80
81 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
82 return false;
83
84 if (pCreateInfo->mipLevels > 1)
85 return false;
86
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
89 */
90 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
91 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
92 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93 return false;
94
95 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
96 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
97 */
98 if (pCreateInfo->samples >= 2 &&
99 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
100 (format == VK_FORMAT_D32_SFLOAT &&
101 device->physical_device->rad_info.chip_class >= GFX10)))
102 return false;
103
104 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
105 * supports 32-bit. Though, it's possible to enable TC-compat for
106 * 16-bit depth surfaces if no Z planes are compressed.
107 */
108 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
109 format != VK_FORMAT_D32_SFLOAT &&
110 format != VK_FORMAT_D16_UNORM)
111 return false;
112
113 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
114 const struct VkImageFormatListCreateInfo *format_list =
115 (const struct VkImageFormatListCreateInfo *)
116 vk_find_struct_const(pCreateInfo->pNext,
117 IMAGE_FORMAT_LIST_CREATE_INFO);
118
119 /* We have to ignore the existence of the list if viewFormatCount = 0 */
120 if (format_list && format_list->viewFormatCount) {
121 /* compatibility is transitive, so we only need to check
122 * one format with everything else.
123 */
124 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
125 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
126 continue;
127
128 if (format != format_list->pViewFormats[i])
129 return false;
130 }
131 } else {
132 return false;
133 }
134 }
135
136 return true;
137 }
138
139 static bool
radv_surface_has_scanout(struct radv_device * device,const struct radv_image_create_info * info)140 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
141 {
142 if (info->bo_metadata) {
143 if (device->physical_device->rad_info.chip_class >= GFX9)
144 return info->bo_metadata->u.gfx9.scanout;
145 else
146 return info->bo_metadata->u.legacy.scanout;
147 }
148
149 return info->scanout;
150 }
151
152 static bool
radv_image_use_fast_clear_for_image(const struct radv_device * device,const struct radv_image * image)153 radv_image_use_fast_clear_for_image(const struct radv_device *device,
154 const struct radv_image *image)
155 {
156 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
157 return true;
158
159 if (image->info.samples <= 1 &&
160 image->info.width * image->info.height <= 512 * 512) {
161 /* Do not enable CMASK or DCC for small surfaces where the cost
162 * of the eliminate pass can be higher than the benefit of fast
163 * clear. RadeonSI does this, but the image threshold is
164 * different.
165 */
166 return false;
167 }
168
169 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
170 (image->exclusive || image->queue_family_mask == 1);
171 }
172
173 static bool
radv_use_dcc_for_image(struct radv_device * device,const struct radv_image * image,const VkImageCreateInfo * pCreateInfo,VkFormat format)174 radv_use_dcc_for_image(struct radv_device *device,
175 const struct radv_image *image,
176 const VkImageCreateInfo *pCreateInfo,
177 VkFormat format)
178 {
179 bool dcc_compatible_formats;
180 bool blendable;
181
182 /* DCC (Delta Color Compression) is only available for GFX8+. */
183 if (device->physical_device->rad_info.chip_class < GFX8)
184 return false;
185
186 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
187 return false;
188
189 if (image->shareable)
190 return false;
191
192 /* TODO: Enable DCC for storage images. */
193 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
194 return false;
195
196 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
197 return false;
198
199 if (vk_format_is_subsampled(format) ||
200 vk_format_get_plane_count(format) > 1)
201 return false;
202
203 if (!radv_image_use_fast_clear_for_image(device, image))
204 return false;
205
206 /* TODO: Enable DCC for mipmaps on GFX9+. */
207 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
208 device->physical_device->rad_info.chip_class >= GFX9)
209 return false;
210
211 /* Do not enable DCC for mipmapped arrays because performance is worse. */
212 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
213 return false;
214
215 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
216 * 2x can be enabled with an option.
217 */
218 if (pCreateInfo->samples > 2 ||
219 (pCreateInfo->samples == 2 &&
220 !device->physical_device->dcc_msaa_allowed))
221 return false;
222
223 /* Determine if the formats are DCC compatible. */
224 dcc_compatible_formats =
225 radv_is_colorbuffer_format_supported(device->physical_device,
226 format, &blendable);
227
228 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
229 const struct VkImageFormatListCreateInfo *format_list =
230 (const struct VkImageFormatListCreateInfo *)
231 vk_find_struct_const(pCreateInfo->pNext,
232 IMAGE_FORMAT_LIST_CREATE_INFO);
233
234 /* We have to ignore the existence of the list if viewFormatCount = 0 */
235 if (format_list && format_list->viewFormatCount) {
236 /* compatibility is transitive, so we only need to check
237 * one format with everything else. */
238 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
239 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
240 continue;
241
242 if (!radv_dcc_formats_compatible(format,
243 format_list->pViewFormats[i]))
244 dcc_compatible_formats = false;
245 }
246 } else {
247 dcc_compatible_formats = false;
248 }
249 }
250
251 if (!dcc_compatible_formats)
252 return false;
253
254 return true;
255 }
256
257 static inline bool
radv_use_fmask_for_image(const struct radv_device * device,const struct radv_image * image)258 radv_use_fmask_for_image(const struct radv_device *device,
259 const struct radv_image *image)
260 {
261 return image->info.samples > 1 &&
262 ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
263 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
264 }
265
266 static inline bool
radv_use_htile_for_image(const struct radv_device * device,const struct radv_image * image)267 radv_use_htile_for_image(const struct radv_device *device,
268 const struct radv_image *image)
269 {
270 return image->info.levels == 1 &&
271 !image->shareable &&
272 ((image->info.width * image->info.height >= 8 * 8) ||
273 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
274 }
275
276 static bool
radv_use_tc_compat_cmask_for_image(struct radv_device * device,struct radv_image * image)277 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
278 struct radv_image *image)
279 {
280 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
281 return false;
282
283 /* TC-compat CMASK is only available for GFX8+. */
284 if (device->physical_device->rad_info.chip_class < GFX8)
285 return false;
286
287 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
288 return false;
289
290 if (radv_image_has_dcc(image))
291 return false;
292
293 if (!radv_image_has_cmask(image))
294 return false;
295
296 return true;
297 }
298
si_get_bo_metadata_word1(const struct radv_device * device)299 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
300 {
301 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
302 }
303
304 static bool
radv_is_valid_opaque_metadata(const struct radv_device * device,const struct radeon_bo_metadata * md)305 radv_is_valid_opaque_metadata(const struct radv_device *device,
306 const struct radeon_bo_metadata *md)
307 {
308 if (md->metadata[0] != 1 ||
309 md->metadata[1] != si_get_bo_metadata_word1(device))
310 return false;
311
312 if (md->size_metadata < 40)
313 return false;
314
315 return true;
316 }
317
318 static void
radv_patch_surface_from_metadata(struct radv_device * device,struct radeon_surf * surface,const struct radeon_bo_metadata * md)319 radv_patch_surface_from_metadata(struct radv_device *device,
320 struct radeon_surf *surface,
321 const struct radeon_bo_metadata *md)
322 {
323 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
324
325 if (device->physical_device->rad_info.chip_class >= GFX9) {
326 if (md->u.gfx9.swizzle_mode > 0)
327 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
328 else
329 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
330
331 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
332 } else {
333 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
334 surface->u.legacy.bankw = md->u.legacy.bankw;
335 surface->u.legacy.bankh = md->u.legacy.bankh;
336 surface->u.legacy.tile_split = md->u.legacy.tile_split;
337 surface->u.legacy.mtilea = md->u.legacy.mtilea;
338 surface->u.legacy.num_banks = md->u.legacy.num_banks;
339
340 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
341 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
342 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
343 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
344 else
345 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
346
347 }
348 }
349
350 static VkResult
radv_patch_image_dimensions(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)351 radv_patch_image_dimensions(struct radv_device *device,
352 struct radv_image *image,
353 const struct radv_image_create_info *create_info,
354 struct ac_surf_info *image_info)
355 {
356 unsigned width = image->info.width;
357 unsigned height = image->info.height;
358
359 /*
360 * minigbm sometimes allocates bigger images which is going to result in
361 * weird strides and other properties. Lets be lenient where possible and
362 * fail it on GFX10 (as we cannot cope there).
363 *
364 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
365 */
366 if (create_info->bo_metadata &&
367 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
368 const struct radeon_bo_metadata *md = create_info->bo_metadata;
369
370 if (device->physical_device->rad_info.chip_class >= GFX10) {
371 width = G_00A004_WIDTH_LO(md->metadata[3]) +
372 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
373 height = G_00A008_HEIGHT(md->metadata[4]) + 1;
374 } else {
375 width = G_008F18_WIDTH(md->metadata[4]) + 1;
376 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
377 }
378 }
379
380 if (image->info.width == width && image->info.height == height)
381 return VK_SUCCESS;
382
383 if (width < image->info.width || height < image->info.height) {
384 fprintf(stderr,
385 "The imported image has smaller dimensions than the internal\n"
386 "dimensions. Using it is going to fail badly, so we reject\n"
387 "this import.\n"
388 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
389 image->info.width, image->info.height, width, height);
390 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
391 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
392 fprintf(stderr,
393 "Tried to import an image with inconsistent width on GFX10.\n"
394 "As GFX10 has no separate stride fields we cannot cope with\n"
395 "an inconsistency in width and will fail this import.\n"
396 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
397 image->info.width, image->info.height, width, height);
398 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
399 } else {
400 fprintf(stderr,
401 "Tried to import an image with inconsistent width on pre-GFX10.\n"
402 "As GFX10 has no separate stride fields we cannot cope with\n"
403 "an inconsistency and would fail on GFX10.\n"
404 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
405 image->info.width, image->info.height, width, height);
406 }
407 image_info->width = width;
408 image_info->height = height;
409
410 return VK_SUCCESS;
411 }
412
413 static VkResult
radv_patch_image_from_extra_info(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)414 radv_patch_image_from_extra_info(struct radv_device *device,
415 struct radv_image *image,
416 const struct radv_image_create_info *create_info,
417 struct ac_surf_info *image_info)
418 {
419 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
420 if (result != VK_SUCCESS)
421 return result;
422
423 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
424 if (create_info->bo_metadata) {
425 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
426 create_info->bo_metadata);
427 }
428
429 if (radv_surface_has_scanout(device, create_info)) {
430 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
431 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
432
433 image->info.surf_index = NULL;
434 }
435 }
436 return VK_SUCCESS;
437 }
438
439 static uint32_t
radv_get_surface_flags(struct radv_device * device,const struct radv_image * image,unsigned plane_id,const VkImageCreateInfo * pCreateInfo,VkFormat image_format)440 radv_get_surface_flags(struct radv_device *device,
441 const struct radv_image *image,
442 unsigned plane_id,
443 const VkImageCreateInfo *pCreateInfo,
444 VkFormat image_format)
445 {
446 uint32_t flags;
447 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
448 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
449 const struct vk_format_description *desc = vk_format_description(format);
450 bool is_depth, is_stencil;
451
452 is_depth = vk_format_has_depth(desc);
453 is_stencil = vk_format_has_stencil(desc);
454
455
456 flags = RADEON_SURF_SET(array_mode, MODE);
457
458 switch (pCreateInfo->imageType){
459 case VK_IMAGE_TYPE_1D:
460 if (pCreateInfo->arrayLayers > 1)
461 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
462 else
463 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
464 break;
465 case VK_IMAGE_TYPE_2D:
466 if (pCreateInfo->arrayLayers > 1)
467 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
468 else
469 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
470 break;
471 case VK_IMAGE_TYPE_3D:
472 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
473 break;
474 default:
475 unreachable("unhandled image type");
476 }
477
478 /* Required for clearing/initializing a specific layer on GFX8. */
479 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
480
481 if (is_depth) {
482 flags |= RADEON_SURF_ZBUFFER;
483 if (!radv_use_htile_for_image(device, image) ||
484 (device->instance->debug_flags & RADV_DEBUG_NO_HIZ))
485 flags |= RADEON_SURF_NO_HTILE;
486 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
487 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
488 }
489
490 if (is_stencil)
491 flags |= RADEON_SURF_SBUFFER;
492
493 if (device->physical_device->rad_info.chip_class >= GFX9 &&
494 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
495 vk_format_get_blocksizebits(image_format) == 128 &&
496 vk_format_is_compressed(image_format))
497 flags |= RADEON_SURF_NO_RENDER_TARGET;
498
499 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
500 flags |= RADEON_SURF_DISABLE_DCC;
501
502 if (!radv_use_fmask_for_image(device, image))
503 flags |= RADEON_SURF_NO_FMASK;
504
505 return flags;
506 }
507
508 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)509 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
510 {
511 if (stencil)
512 return plane->surface.u.legacy.stencil_tiling_index[level];
513 else
514 return plane->surface.u.legacy.tiling_index[level];
515 }
516
radv_map_swizzle(unsigned swizzle)517 static unsigned radv_map_swizzle(unsigned swizzle)
518 {
519 switch (swizzle) {
520 case VK_SWIZZLE_Y:
521 return V_008F0C_SQ_SEL_Y;
522 case VK_SWIZZLE_Z:
523 return V_008F0C_SQ_SEL_Z;
524 case VK_SWIZZLE_W:
525 return V_008F0C_SQ_SEL_W;
526 case VK_SWIZZLE_0:
527 return V_008F0C_SQ_SEL_0;
528 case VK_SWIZZLE_1:
529 return V_008F0C_SQ_SEL_1;
530 default: /* VK_SWIZZLE_X */
531 return V_008F0C_SQ_SEL_X;
532 }
533 }
534
535 static void
radv_compose_swizzle(const struct vk_format_description * desc,const VkComponentMapping * mapping,enum vk_swizzle swizzle[4])536 radv_compose_swizzle(const struct vk_format_description *desc,
537 const VkComponentMapping *mapping, enum vk_swizzle swizzle[4])
538 {
539 if (desc->format == VK_FORMAT_R64_UINT || desc->format == VK_FORMAT_R64_SINT) {
540 /* 64-bit formats only support storage images and storage images
541 * require identity component mappings. We use 32-bit
542 * instructions to access 64-bit images, so we need a special
543 * case here.
544 *
545 * The zw components are 1,0 so that they can be easily be used
546 * by loads to create the w component, which has to be 0 for
547 * NULL descriptors.
548 */
549 swizzle[0] = VK_SWIZZLE_X;
550 swizzle[1] = VK_SWIZZLE_Y;
551 swizzle[2] = VK_SWIZZLE_1;
552 swizzle[3] = VK_SWIZZLE_0;
553 } else if (!mapping) {
554 for (unsigned i = 0; i < 4; i++)
555 swizzle[i] = desc->swizzle[i];
556 } else if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
557 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
558 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
559 } else {
560 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
561 }
562 }
563
564 static void
radv_make_buffer_descriptor(struct radv_device * device,struct radv_buffer * buffer,VkFormat vk_format,unsigned offset,unsigned range,uint32_t * state)565 radv_make_buffer_descriptor(struct radv_device *device,
566 struct radv_buffer *buffer,
567 VkFormat vk_format,
568 unsigned offset,
569 unsigned range,
570 uint32_t *state)
571 {
572 const struct vk_format_description *desc;
573 unsigned stride;
574 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
575 uint64_t va = gpu_address + buffer->offset;
576 unsigned num_format, data_format;
577 int first_non_void;
578 enum vk_swizzle swizzle[4];
579 desc = vk_format_description(vk_format);
580 first_non_void = vk_format_get_first_non_void_channel(vk_format);
581 stride = desc->block.bits / 8;
582
583 radv_compose_swizzle(desc, NULL, swizzle);
584
585 va += offset;
586 state[0] = va;
587 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
588 S_008F04_STRIDE(stride);
589
590 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
591 range /= stride;
592 }
593
594 state[2] = range;
595 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
596 S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
597 S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
598 S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
599
600 if (device->physical_device->rad_info.chip_class >= GFX10) {
601 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
602
603 /* OOB_SELECT chooses the out-of-bounds check:
604 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
605 * - 1: index >= NUM_RECORDS
606 * - 2: NUM_RECORDS == 0
607 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
608 * else: swizzle_address >= NUM_RECORDS
609 */
610 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
611 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
612 S_008F0C_RESOURCE_LEVEL(1);
613 } else {
614 num_format = radv_translate_buffer_numformat(desc, first_non_void);
615 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
616
617 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
618 assert(num_format != ~0);
619
620 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
621 S_008F0C_DATA_FORMAT(data_format);
622 }
623 }
624
625 static void
si_set_mutable_tex_desc_fields(struct radv_device * device,struct radv_image * image,const struct legacy_surf_level * base_level_info,unsigned plane_id,unsigned base_level,unsigned first_level,unsigned block_width,bool is_stencil,bool is_storage_image,bool disable_compression,uint32_t * state)626 si_set_mutable_tex_desc_fields(struct radv_device *device,
627 struct radv_image *image,
628 const struct legacy_surf_level *base_level_info,
629 unsigned plane_id,
630 unsigned base_level, unsigned first_level,
631 unsigned block_width, bool is_stencil,
632 bool is_storage_image, bool disable_compression,
633 uint32_t *state)
634 {
635 struct radv_image_plane *plane = &image->planes[plane_id];
636 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
637 uint64_t va = gpu_address + plane->offset;
638 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
639 uint64_t meta_va = 0;
640 if (chip_class >= GFX9) {
641 if (is_stencil)
642 va += plane->surface.u.gfx9.stencil_offset;
643 else
644 va += plane->surface.u.gfx9.surf_offset;
645 } else
646 va += base_level_info->offset;
647
648 state[0] = va >> 8;
649 if (chip_class >= GFX9 ||
650 base_level_info->mode == RADEON_SURF_MODE_2D)
651 state[0] |= plane->surface.tile_swizzle;
652 state[1] &= C_008F14_BASE_ADDRESS_HI;
653 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
654
655 if (chip_class >= GFX8) {
656 state[6] &= C_008F28_COMPRESSION_EN;
657 state[7] = 0;
658 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
659 meta_va = gpu_address + plane->surface.dcc_offset;
660 if (chip_class <= GFX8)
661 meta_va += base_level_info->dcc_offset;
662
663 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
664 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
665 meta_va |= dcc_tile_swizzle;
666 } else if (!disable_compression &&
667 radv_image_is_tc_compat_htile(image)) {
668 meta_va = gpu_address + plane->surface.htile_offset;
669 }
670
671 if (meta_va) {
672 state[6] |= S_008F28_COMPRESSION_EN(1);
673 if (chip_class <= GFX9)
674 state[7] = meta_va >> 8;
675 }
676 }
677
678 if (chip_class >= GFX10) {
679 state[3] &= C_00A00C_SW_MODE;
680
681 if (is_stencil) {
682 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
683 } else {
684 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
685 }
686
687 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
688 C_00A018_META_PIPE_ALIGNED;
689
690 if (meta_va) {
691 struct gfx9_surf_meta_flags meta = {
692 .rb_aligned = 1,
693 .pipe_aligned = 1,
694 };
695
696 if (plane->surface.dcc_offset)
697 meta = plane->surface.u.gfx9.dcc;
698
699 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
700 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
701 }
702
703 state[7] = meta_va >> 16;
704 } else if (chip_class == GFX9) {
705 state[3] &= C_008F1C_SW_MODE;
706 state[4] &= C_008F20_PITCH;
707
708 if (is_stencil) {
709 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
710 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
711 } else {
712 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
713 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
714 }
715
716 state[5] &= C_008F24_META_DATA_ADDRESS &
717 C_008F24_META_PIPE_ALIGNED &
718 C_008F24_META_RB_ALIGNED;
719 if (meta_va) {
720 struct gfx9_surf_meta_flags meta = {
721 .rb_aligned = 1,
722 .pipe_aligned = 1,
723 };
724
725 if (plane->surface.dcc_offset)
726 meta = plane->surface.u.gfx9.dcc;
727
728 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
729 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
730 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
731 }
732 } else {
733 /* GFX6-GFX8 */
734 unsigned pitch = base_level_info->nblk_x * block_width;
735 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
736
737 state[3] &= C_008F1C_TILING_INDEX;
738 state[3] |= S_008F1C_TILING_INDEX(index);
739 state[4] &= C_008F20_PITCH;
740 state[4] |= S_008F20_PITCH(pitch - 1);
741 }
742 }
743
radv_tex_dim(VkImageType image_type,VkImageViewType view_type,unsigned nr_layers,unsigned nr_samples,bool is_storage_image,bool gfx9)744 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
745 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
746 {
747 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
748 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
749
750 /* GFX9 allocates 1D textures as 2D. */
751 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
752 image_type = VK_IMAGE_TYPE_2D;
753 switch (image_type) {
754 case VK_IMAGE_TYPE_1D:
755 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
756 case VK_IMAGE_TYPE_2D:
757 if (nr_samples > 1)
758 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
759 else
760 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
761 case VK_IMAGE_TYPE_3D:
762 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
763 return V_008F1C_SQ_RSRC_IMG_3D;
764 else
765 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
766 default:
767 unreachable("illegal image type");
768 }
769 }
770
gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])771 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
772 {
773 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
774
775 if (swizzle[3] == VK_SWIZZLE_X) {
776 /* For the pre-defined border color values (white, opaque
777 * black, transparent black), the only thing that matters is
778 * that the alpha channel winds up in the correct place
779 * (because the RGB channels are all the same) so either of
780 * these enumerations will work.
781 */
782 if (swizzle[2] == VK_SWIZZLE_Y)
783 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
784 else
785 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
786 } else if (swizzle[0] == VK_SWIZZLE_X) {
787 if (swizzle[1] == VK_SWIZZLE_Y)
788 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
789 else
790 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
791 } else if (swizzle[1] == VK_SWIZZLE_X) {
792 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
793 } else if (swizzle[2] == VK_SWIZZLE_X) {
794 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
795 }
796
797 return bc_swizzle;
798 }
799
vi_alpha_is_on_msb(struct radv_device * device,VkFormat format)800 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
801 {
802 const struct vk_format_description *desc = vk_format_description(format);
803
804 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
805 return desc->swizzle[3] == VK_SWIZZLE_X;
806
807 return radv_translate_colorswap(format, false) <= 1;
808 }
809 /**
810 * Build the sampler view descriptor for a texture (GFX10).
811 */
812 static void
gfx10_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)813 gfx10_make_texture_descriptor(struct radv_device *device,
814 struct radv_image *image,
815 bool is_storage_image,
816 VkImageViewType view_type,
817 VkFormat vk_format,
818 const VkComponentMapping *mapping,
819 unsigned first_level, unsigned last_level,
820 unsigned first_layer, unsigned last_layer,
821 unsigned width, unsigned height, unsigned depth,
822 uint32_t *state,
823 uint32_t *fmask_state)
824 {
825 const struct vk_format_description *desc;
826 enum vk_swizzle swizzle[4];
827 unsigned img_format;
828 unsigned type;
829
830 desc = vk_format_description(vk_format);
831 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
832
833 radv_compose_swizzle(desc, mapping, swizzle);
834
835 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
836 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
837 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
838 height = 1;
839 depth = image->info.array_size;
840 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
841 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
842 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
843 depth = image->info.array_size;
844 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
845 depth = image->info.array_size / 6;
846
847 state[0] = 0;
848 state[1] = S_00A004_FORMAT(img_format) |
849 S_00A004_WIDTH_LO(width - 1);
850 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
851 S_00A008_HEIGHT(height - 1) |
852 S_00A008_RESOURCE_LEVEL(1);
853 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
854 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
855 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
856 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
857 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
858 0 : first_level) |
859 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
860 util_logbase2(image->info.samples) :
861 last_level) |
862 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
863 S_00A00C_TYPE(type);
864 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
865 * to know the total number of layers.
866 */
867 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
868 S_00A010_BASE_ARRAY(first_layer);
869 state[5] = S_00A014_ARRAY_PITCH(0) |
870 S_00A014_MAX_MIP(image->info.samples > 1 ?
871 util_logbase2(image->info.samples) :
872 image->info.levels - 1) |
873 S_00A014_PERF_MOD(4);
874 state[6] = 0;
875 state[7] = 0;
876
877 if (radv_dcc_enabled(image, first_level)) {
878 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
879 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
880 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
881 }
882
883 /* Initialize the sampler view for FMASK. */
884 if (fmask_state) {
885 if (radv_image_has_fmask(image)) {
886 uint64_t gpu_address = radv_buffer_get_va(image->bo);
887 uint32_t format;
888 uint64_t va;
889
890 assert(image->plane_count == 1);
891
892 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
893
894 switch (image->info.samples) {
895 case 2:
896 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
897 break;
898 case 4:
899 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
900 break;
901 case 8:
902 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
903 break;
904 default:
905 unreachable("invalid nr_samples");
906 }
907
908 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
909 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
910 S_00A004_FORMAT(format) |
911 S_00A004_WIDTH_LO(width - 1);
912 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
913 S_00A008_HEIGHT(height - 1) |
914 S_00A008_RESOURCE_LEVEL(1);
915 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
916 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
917 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
918 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
919 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
920 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
921 fmask_state[4] = S_00A010_DEPTH(last_layer) |
922 S_00A010_BASE_ARRAY(first_layer);
923 fmask_state[5] = 0;
924 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
925 fmask_state[7] = 0;
926 } else
927 memset(fmask_state, 0, 8 * 4);
928 }
929 }
930
931 /**
932 * Build the sampler view descriptor for a texture (SI-GFX9)
933 */
934 static void
si_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)935 si_make_texture_descriptor(struct radv_device *device,
936 struct radv_image *image,
937 bool is_storage_image,
938 VkImageViewType view_type,
939 VkFormat vk_format,
940 const VkComponentMapping *mapping,
941 unsigned first_level, unsigned last_level,
942 unsigned first_layer, unsigned last_layer,
943 unsigned width, unsigned height, unsigned depth,
944 uint32_t *state,
945 uint32_t *fmask_state)
946 {
947 const struct vk_format_description *desc;
948 enum vk_swizzle swizzle[4];
949 int first_non_void;
950 unsigned num_format, data_format, type;
951
952 desc = vk_format_description(vk_format);
953
954 radv_compose_swizzle(desc, mapping, swizzle);
955
956 first_non_void = vk_format_get_first_non_void_channel(vk_format);
957
958 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
959 if (num_format == ~0) {
960 num_format = 0;
961 }
962
963 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
964 if (data_format == ~0) {
965 data_format = 0;
966 }
967
968 /* S8 with either Z16 or Z32 HTILE need a special format. */
969 if (device->physical_device->rad_info.chip_class == GFX9 &&
970 vk_format == VK_FORMAT_S8_UINT &&
971 radv_image_is_tc_compat_htile(image)) {
972 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
973 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
974 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
975 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
976 }
977 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
978 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
979 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
980 height = 1;
981 depth = image->info.array_size;
982 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
983 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
984 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
985 depth = image->info.array_size;
986 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
987 depth = image->info.array_size / 6;
988
989 state[0] = 0;
990 state[1] = (S_008F14_DATA_FORMAT(data_format) |
991 S_008F14_NUM_FORMAT(num_format));
992 state[2] = (S_008F18_WIDTH(width - 1) |
993 S_008F18_HEIGHT(height - 1) |
994 S_008F18_PERF_MOD(4));
995 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
996 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
997 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
998 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
999 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
1000 0 : first_level) |
1001 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
1002 util_logbase2(image->info.samples) :
1003 last_level) |
1004 S_008F1C_TYPE(type));
1005 state[4] = 0;
1006 state[5] = S_008F24_BASE_ARRAY(first_layer);
1007 state[6] = 0;
1008 state[7] = 0;
1009
1010 if (device->physical_device->rad_info.chip_class == GFX9) {
1011 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
1012
1013 /* Depth is the last accessible layer on Gfx9.
1014 * The hw doesn't need to know the total number of layers.
1015 */
1016 if (type == V_008F1C_SQ_RSRC_IMG_3D)
1017 state[4] |= S_008F20_DEPTH(depth - 1);
1018 else
1019 state[4] |= S_008F20_DEPTH(last_layer);
1020
1021 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1022 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
1023 util_logbase2(image->info.samples) :
1024 image->info.levels - 1);
1025 } else {
1026 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1027 state[4] |= S_008F20_DEPTH(depth - 1);
1028 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1029 }
1030 if (image->planes[0].surface.dcc_offset) {
1031 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1032 } else {
1033 /* The last dword is unused by hw. The shader uses it to clear
1034 * bits in the first dword of sampler state.
1035 */
1036 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1037 if (first_level == last_level)
1038 state[7] = C_008F30_MAX_ANISO_RATIO;
1039 else
1040 state[7] = 0xffffffff;
1041 }
1042 }
1043
1044 /* Initialize the sampler view for FMASK. */
1045 if (fmask_state) {
1046 if (radv_image_has_fmask(image)) {
1047 uint32_t fmask_format, num_format;
1048 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1049 uint64_t va;
1050
1051 assert(image->plane_count == 1);
1052
1053 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1054
1055 if (device->physical_device->rad_info.chip_class == GFX9) {
1056 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1057 switch (image->info.samples) {
1058 case 2:
1059 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
1060 break;
1061 case 4:
1062 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
1063 break;
1064 case 8:
1065 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
1066 break;
1067 default:
1068 unreachable("invalid nr_samples");
1069 }
1070 } else {
1071 switch (image->info.samples) {
1072 case 2:
1073 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1074 break;
1075 case 4:
1076 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1077 break;
1078 case 8:
1079 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1080 break;
1081 default:
1082 assert(0);
1083 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1084 }
1085 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1086 }
1087
1088 fmask_state[0] = va >> 8;
1089 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1090 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1091 S_008F14_DATA_FORMAT(fmask_format) |
1092 S_008F14_NUM_FORMAT(num_format);
1093 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1094 S_008F18_HEIGHT(height - 1);
1095 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1096 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1097 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1098 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1099 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1100 fmask_state[4] = 0;
1101 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1102 fmask_state[6] = 0;
1103 fmask_state[7] = 0;
1104
1105 if (device->physical_device->rad_info.chip_class == GFX9) {
1106 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1107 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1108 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1109 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1110 S_008F24_META_RB_ALIGNED(1);
1111
1112 if (radv_image_is_tc_compat_cmask(image)) {
1113 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1114
1115 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1116 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1117 fmask_state[7] |= va >> 8;
1118 }
1119 } else {
1120 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1121 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1122 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1123 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1124
1125 if (radv_image_is_tc_compat_cmask(image)) {
1126 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1127
1128 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1129 fmask_state[7] |= va >> 8;
1130 }
1131 }
1132 } else
1133 memset(fmask_state, 0, 8 * 4);
1134 }
1135 }
1136
1137 static void
radv_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)1138 radv_make_texture_descriptor(struct radv_device *device,
1139 struct radv_image *image,
1140 bool is_storage_image,
1141 VkImageViewType view_type,
1142 VkFormat vk_format,
1143 const VkComponentMapping *mapping,
1144 unsigned first_level, unsigned last_level,
1145 unsigned first_layer, unsigned last_layer,
1146 unsigned width, unsigned height, unsigned depth,
1147 uint32_t *state,
1148 uint32_t *fmask_state)
1149 {
1150 if (device->physical_device->rad_info.chip_class >= GFX10) {
1151 gfx10_make_texture_descriptor(device, image, is_storage_image,
1152 view_type, vk_format, mapping,
1153 first_level, last_level,
1154 first_layer, last_layer,
1155 width, height, depth,
1156 state, fmask_state);
1157 } else {
1158 si_make_texture_descriptor(device, image, is_storage_image,
1159 view_type, vk_format, mapping,
1160 first_level, last_level,
1161 first_layer, last_layer,
1162 width, height, depth,
1163 state, fmask_state);
1164 }
1165 }
1166
1167 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * md)1168 radv_query_opaque_metadata(struct radv_device *device,
1169 struct radv_image *image,
1170 struct radeon_bo_metadata *md)
1171 {
1172 static const VkComponentMapping fixedmapping;
1173 uint32_t desc[8], i;
1174
1175 assert(image->plane_count == 1);
1176
1177 /* Metadata image format format version 1:
1178 * [0] = 1 (metadata format identifier)
1179 * [1] = (VENDOR_ID << 16) | PCI_ID
1180 * [2:9] = image descriptor for the whole resource
1181 * [2] is always 0, because the base address is cleared
1182 * [9] is the DCC offset bits [39:8] from the beginning of
1183 * the buffer
1184 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1185 */
1186 md->metadata[0] = 1; /* metadata image format version 1 */
1187
1188 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1189 md->metadata[1] = si_get_bo_metadata_word1(device);
1190
1191
1192 radv_make_texture_descriptor(device, image, false,
1193 (VkImageViewType)image->type, image->vk_format,
1194 &fixedmapping, 0, image->info.levels - 1, 0,
1195 image->info.array_size - 1,
1196 image->info.width, image->info.height,
1197 image->info.depth,
1198 desc, NULL);
1199
1200 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1201 image->planes[0].surface.blk_w, false, false, false, desc);
1202
1203 /* Clear the base address and set the relative DCC offset. */
1204 desc[0] = 0;
1205 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1206 desc[7] = image->planes[0].surface.dcc_offset >> 8;
1207
1208 /* Dwords [2:9] contain the image descriptor. */
1209 memcpy(&md->metadata[2], desc, sizeof(desc));
1210
1211 /* Dwords [10:..] contain the mipmap level offsets. */
1212 if (device->physical_device->rad_info.chip_class <= GFX8) {
1213 for (i = 0; i <= image->info.levels - 1; i++)
1214 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1215 md->size_metadata = (11 + image->info.levels - 1) * 4;
1216 } else
1217 md->size_metadata = 10 * 4;
1218 }
1219
1220 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)1221 radv_init_metadata(struct radv_device *device,
1222 struct radv_image *image,
1223 struct radeon_bo_metadata *metadata)
1224 {
1225 struct radeon_surf *surface = &image->planes[0].surface;
1226
1227 memset(metadata, 0, sizeof(*metadata));
1228
1229 if (device->physical_device->rad_info.chip_class >= GFX9) {
1230 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1231 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1232 } else {
1233 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1234 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1235 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1236 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1237 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1238 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1239 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1240 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1241 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1242 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1243 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1244 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1245 }
1246 radv_query_opaque_metadata(device, image, metadata);
1247 }
1248
1249 void
radv_image_override_offset_stride(struct radv_device * device,struct radv_image * image,uint64_t offset,uint32_t stride)1250 radv_image_override_offset_stride(struct radv_device *device,
1251 struct radv_image *image,
1252 uint64_t offset, uint32_t stride)
1253 {
1254 ac_surface_override_offset_stride(&device->physical_device->rad_info,
1255 &image->planes[0].surface,
1256 image->info.levels, offset, stride);
1257 }
1258
1259 static void
radv_image_alloc_single_sample_cmask(const struct radv_device * device,const struct radv_image * image,struct radeon_surf * surf)1260 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1261 const struct radv_image *image,
1262 struct radeon_surf *surf)
1263 {
1264 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 ||
1265 image->info.levels > 1 || image->info.depth > 1 ||
1266 radv_image_has_dcc(image) ||
1267 !radv_image_use_fast_clear_for_image(device, image))
1268 return;
1269
1270 assert(image->info.storage_samples == 1);
1271
1272 surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
1273 surf->total_size = surf->cmask_offset + surf->cmask_size;
1274 surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
1275 }
1276
1277 static void
radv_image_alloc_values(const struct radv_device * device,struct radv_image * image)1278 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1279 {
1280 if (radv_image_has_dcc(image)) {
1281 image->fce_pred_offset = image->size;
1282 image->size += 8 * image->info.levels;
1283
1284 image->dcc_pred_offset = image->size;
1285 image->size += 8 * image->info.levels;
1286 }
1287
1288 if (radv_image_has_dcc(image) || radv_image_has_cmask(image) ||
1289 radv_image_has_htile(image)) {
1290 image->clear_value_offset = image->size;
1291 image->size += 8 * image->info.levels;
1292 }
1293
1294 if (radv_image_is_tc_compat_htile(image) &&
1295 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1296 /* Metadata for the TC-compatible HTILE hardware bug which
1297 * have to be fixed by updating ZRANGE_PRECISION when doing
1298 * fast depth clears to 0.0f.
1299 */
1300 image->tc_compat_zrange_offset = image->size;
1301 image->size += image->info.levels * 4;
1302 }
1303 }
1304
1305
1306 static void
radv_image_reset_layout(struct radv_image * image)1307 radv_image_reset_layout(struct radv_image *image)
1308 {
1309 image->size = 0;
1310 image->alignment = 1;
1311
1312 image->tc_compatible_cmask = image->tc_compatible_htile = 0;
1313 image->fce_pred_offset = image->dcc_pred_offset = 0;
1314 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1315
1316 for (unsigned i = 0; i < image->plane_count; ++i) {
1317 VkFormat format = vk_format_get_plane_format(image->vk_format, i);
1318
1319 uint32_t flags = image->planes[i].surface.flags;
1320 memset(image->planes + i, 0, sizeof(image->planes[i]));
1321
1322 image->planes[i].surface.flags = flags;
1323 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1324 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1325 image->planes[i].surface.bpe = vk_format_get_blocksize(vk_format_depth_only(format));
1326
1327 /* align byte per element on dword */
1328 if (image->planes[i].surface.bpe == 3) {
1329 image->planes[i].surface.bpe = 4;
1330 }
1331 }
1332 }
1333
1334 VkResult
radv_image_create_layout(struct radv_device * device,struct radv_image_create_info create_info,struct radv_image * image)1335 radv_image_create_layout(struct radv_device *device,
1336 struct radv_image_create_info create_info,
1337 struct radv_image *image)
1338 {
1339 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1340 * common internal case. */
1341 create_info.vk_info = NULL;
1342
1343 struct ac_surf_info image_info = image->info;
1344 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1345 if (result != VK_SUCCESS)
1346 return result;
1347
1348 radv_image_reset_layout(image);
1349
1350 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1351 struct ac_surf_info info = image_info;
1352
1353 if (plane) {
1354 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1355 assert(info.width % desc->width_divisor == 0);
1356 assert(info.height % desc->height_divisor == 0);
1357
1358 info.width /= desc->width_divisor;
1359 info.height /= desc->height_divisor;
1360 }
1361
1362 if (create_info.no_metadata_planes || image->plane_count > 1) {
1363 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
1364 RADEON_SURF_NO_FMASK |
1365 RADEON_SURF_NO_HTILE;
1366 }
1367
1368 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1369
1370 if (!create_info.no_metadata_planes && image->plane_count == 1)
1371 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1372
1373 image->planes[plane].offset = align(image->size, image->planes[plane].surface.alignment);
1374 image->size = image->planes[plane].offset + image->planes[plane].surface.total_size;
1375 image->alignment = MAX2(image->alignment, image->planes[plane].surface.alignment);
1376
1377 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1378 }
1379
1380 image->tc_compatible_cmask = radv_image_has_cmask(image) &&
1381 radv_use_tc_compat_cmask_for_image(device, image);
1382
1383 image->tc_compatible_htile = radv_image_has_htile(image) &&
1384 image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1385
1386 radv_image_alloc_values(device, image);
1387
1388 assert(image->planes[0].surface.surf_size);
1389 return VK_SUCCESS;
1390 }
1391
1392 static void
radv_destroy_image(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_image * image)1393 radv_destroy_image(struct radv_device *device,
1394 const VkAllocationCallbacks *pAllocator,
1395 struct radv_image *image)
1396 {
1397 if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1398 device->ws->buffer_destroy(image->bo);
1399
1400 if (image->owned_memory != VK_NULL_HANDLE) {
1401 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1402 radv_free_memory(device, pAllocator, mem);
1403 }
1404
1405 vk_object_base_finish(&image->base);
1406 vk_free2(&device->vk.alloc, pAllocator, image);
1407 }
1408
1409 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage)1410 radv_image_create(VkDevice _device,
1411 const struct radv_image_create_info *create_info,
1412 const VkAllocationCallbacks* alloc,
1413 VkImage *pImage)
1414 {
1415 RADV_FROM_HANDLE(radv_device, device, _device);
1416 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1417 struct radv_image *image = NULL;
1418 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1419 pCreateInfo->format);
1420 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1421
1422 const unsigned plane_count = vk_format_get_plane_count(format);
1423 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1424
1425 radv_assert(pCreateInfo->mipLevels > 0);
1426 radv_assert(pCreateInfo->arrayLayers > 0);
1427 radv_assert(pCreateInfo->samples > 0);
1428 radv_assert(pCreateInfo->extent.width > 0);
1429 radv_assert(pCreateInfo->extent.height > 0);
1430 radv_assert(pCreateInfo->extent.depth > 0);
1431
1432 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
1433 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1434 if (!image)
1435 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1436
1437 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1438
1439 image->type = pCreateInfo->imageType;
1440 image->info.width = pCreateInfo->extent.width;
1441 image->info.height = pCreateInfo->extent.height;
1442 image->info.depth = pCreateInfo->extent.depth;
1443 image->info.samples = pCreateInfo->samples;
1444 image->info.storage_samples = pCreateInfo->samples;
1445 image->info.array_size = pCreateInfo->arrayLayers;
1446 image->info.levels = pCreateInfo->mipLevels;
1447 image->info.num_channels = vk_format_get_nr_components(format);
1448
1449 image->vk_format = format;
1450 image->tiling = pCreateInfo->tiling;
1451 image->usage = pCreateInfo->usage;
1452 image->flags = pCreateInfo->flags;
1453 image->plane_count = plane_count;
1454
1455 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1456 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1457 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1458 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1459 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1460 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1461 else
1462 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1463 }
1464
1465 const VkExternalMemoryImageCreateInfo *external_info =
1466 vk_find_struct_const(pCreateInfo->pNext,
1467 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1468
1469 image->shareable = external_info;
1470 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1471 image->info.surf_index = &device->image_mrt_offset_counter;
1472 }
1473
1474 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1475 image->planes[plane].surface.flags =
1476 radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1477 }
1478
1479 bool delay_layout = external_info &&
1480 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1481
1482 if (delay_layout) {
1483 *pImage = radv_image_to_handle(image);
1484 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1485 return VK_SUCCESS;
1486 }
1487
1488 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1489 assert(result == VK_SUCCESS);
1490
1491 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1492 image->alignment = MAX2(image->alignment, 4096);
1493 image->size = align64(image->size, image->alignment);
1494 image->offset = 0;
1495
1496 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1497 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1498 if (!image->bo) {
1499 radv_destroy_image(device, alloc, image);
1500 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1501 }
1502 }
1503
1504 *pImage = radv_image_to_handle(image);
1505
1506 return VK_SUCCESS;
1507 }
1508
1509 static void
radv_image_view_make_descriptor(struct radv_image_view * iview,struct radv_device * device,VkFormat vk_format,const VkComponentMapping * components,bool is_storage_image,bool disable_compression,unsigned plane_id,unsigned descriptor_plane_id)1510 radv_image_view_make_descriptor(struct radv_image_view *iview,
1511 struct radv_device *device,
1512 VkFormat vk_format,
1513 const VkComponentMapping *components,
1514 bool is_storage_image, bool disable_compression,
1515 unsigned plane_id, unsigned descriptor_plane_id)
1516 {
1517 struct radv_image *image = iview->image;
1518 struct radv_image_plane *plane = &image->planes[plane_id];
1519 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1520 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1521 uint32_t blk_w;
1522 union radv_descriptor *descriptor;
1523 uint32_t hw_level = 0;
1524
1525 if (is_storage_image) {
1526 descriptor = &iview->storage_descriptor;
1527 } else {
1528 descriptor = &iview->descriptor;
1529 }
1530
1531 assert(vk_format_get_plane_count(vk_format) == 1);
1532 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1533 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1534
1535 if (device->physical_device->rad_info.chip_class >= GFX9)
1536 hw_level = iview->base_mip;
1537 radv_make_texture_descriptor(device, image, is_storage_image,
1538 iview->type,
1539 vk_format,
1540 components,
1541 hw_level, hw_level + iview->level_count - 1,
1542 iview->base_layer,
1543 iview->base_layer + iview->layer_count - 1,
1544 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1545 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1546 iview->extent.depth,
1547 descriptor->plane_descriptors[descriptor_plane_id],
1548 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1549
1550 const struct legacy_surf_level *base_level_info = NULL;
1551 if (device->physical_device->rad_info.chip_class <= GFX9) {
1552 if (is_stencil)
1553 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1554 else
1555 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1556 }
1557 si_set_mutable_tex_desc_fields(device, image,
1558 base_level_info,
1559 plane_id,
1560 iview->base_mip,
1561 iview->base_mip,
1562 blk_w, is_stencil, is_storage_image,
1563 is_storage_image || disable_compression,
1564 descriptor->plane_descriptors[descriptor_plane_id]);
1565 }
1566
1567 static unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)1568 radv_plane_from_aspect(VkImageAspectFlags mask)
1569 {
1570 switch(mask) {
1571 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1572 return 1;
1573 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1574 return 2;
1575 default:
1576 return 0;
1577 }
1578 }
1579
1580 VkFormat
radv_get_aspect_format(struct radv_image * image,VkImageAspectFlags mask)1581 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1582 {
1583 switch(mask) {
1584 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1585 return image->planes[0].format;
1586 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1587 return image->planes[1].format;
1588 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1589 return image->planes[2].format;
1590 case VK_IMAGE_ASPECT_STENCIL_BIT:
1591 return vk_format_stencil_only(image->vk_format);
1592 case VK_IMAGE_ASPECT_DEPTH_BIT:
1593 return vk_format_depth_only(image->vk_format);
1594 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1595 return vk_format_depth_only(image->vk_format);
1596 default:
1597 return image->vk_format;
1598 }
1599 }
1600
1601 void
radv_image_view_init(struct radv_image_view * iview,struct radv_device * device,const VkImageViewCreateInfo * pCreateInfo,const struct radv_image_view_extra_create_info * extra_create_info)1602 radv_image_view_init(struct radv_image_view *iview,
1603 struct radv_device *device,
1604 const VkImageViewCreateInfo* pCreateInfo,
1605 const struct radv_image_view_extra_create_info* extra_create_info)
1606 {
1607 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1608 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1609
1610 switch (image->type) {
1611 case VK_IMAGE_TYPE_1D:
1612 case VK_IMAGE_TYPE_2D:
1613 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1614 break;
1615 case VK_IMAGE_TYPE_3D:
1616 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1617 <= radv_minify(image->info.depth, range->baseMipLevel));
1618 break;
1619 default:
1620 unreachable("bad VkImageType");
1621 }
1622 iview->image = image;
1623 iview->bo = image->bo;
1624 iview->type = pCreateInfo->viewType;
1625 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1626 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1627 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1628
1629 iview->base_layer = range->baseArrayLayer;
1630 iview->layer_count = radv_get_layerCount(image, range);
1631 iview->base_mip = range->baseMipLevel;
1632 iview->level_count = radv_get_levelCount(image, range);
1633
1634 iview->vk_format = pCreateInfo->format;
1635
1636 /* If the image has an Android external format, pCreateInfo->format will be
1637 * VK_FORMAT_UNDEFINED. */
1638 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1639 iview->vk_format = image->vk_format;
1640
1641 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1642 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1643 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1644 iview->vk_format = vk_format_depth_only(iview->vk_format);
1645 }
1646
1647 if (device->physical_device->rad_info.chip_class >= GFX9) {
1648 iview->extent = (VkExtent3D) {
1649 .width = image->info.width,
1650 .height = image->info.height,
1651 .depth = image->info.depth,
1652 };
1653 } else {
1654 iview->extent = (VkExtent3D) {
1655 .width = radv_minify(image->info.width , range->baseMipLevel),
1656 .height = radv_minify(image->info.height, range->baseMipLevel),
1657 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1658 };
1659 }
1660
1661 if (iview->vk_format != image->planes[iview->plane_id].format) {
1662 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1663 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1664 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1665 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1666
1667 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1668 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1669
1670 /* Comment ported from amdvlk -
1671 * If we have the following image:
1672 * Uncompressed pixels Compressed block sizes (4x4)
1673 * mip0: 22 x 22 6 x 6
1674 * mip1: 11 x 11 3 x 3
1675 * mip2: 5 x 5 2 x 2
1676 * mip3: 2 x 2 1 x 1
1677 * mip4: 1 x 1 1 x 1
1678 *
1679 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1680 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1681 * divide-by-two integer math):
1682 * mip0: 6x6
1683 * mip1: 3x3
1684 * mip2: 1x1
1685 * mip3: 1x1
1686 *
1687 * This means that mip2 will be missing texels.
1688 *
1689 * Fix this by calculating the base mip's width and height, then convert
1690 * that, and round it back up to get the level 0 size. Clamp the
1691 * converted size between the original values, and the physical extent
1692 * of the base mipmap.
1693 *
1694 * On GFX10 we have to take care to not go over the physical extent
1695 * of the base mipmap as otherwise the GPU computes a different layout.
1696 * Note that the GPU does use the same base-mip dimensions for both a
1697 * block compatible format and the compressed format, so even if we take
1698 * the plain converted dimensions the physical layout is correct.
1699 */
1700 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1701 vk_format_is_compressed(image->vk_format) &&
1702 !vk_format_is_compressed(iview->vk_format)) {
1703 /* If we have multiple levels in the view we should ideally take the last level,
1704 * but the mip calculation has a max(..., 1) so walking back to the base mip in an
1705 * useful way is hard. */
1706 if (iview->level_count > 1) {
1707 iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
1708 iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
1709 } else {
1710 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1711 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1712
1713 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1714 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1715
1716 lvl_width <<= range->baseMipLevel;
1717 lvl_height <<= range->baseMipLevel;
1718
1719 iview->extent.width = CLAMP(lvl_width, iview->extent.width,
1720 iview->image->planes[0].surface.u.gfx9.base_mip_width);
1721 iview->extent.height = CLAMP(lvl_height, iview->extent.height,
1722 iview->image->planes[0].surface.u.gfx9.base_mip_height);
1723 }
1724 }
1725 }
1726
1727 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1728 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1729 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1730 radv_image_view_make_descriptor(iview, device, format,
1731 &pCreateInfo->components,
1732 false, disable_compression,
1733 iview->plane_id + i, i);
1734 radv_image_view_make_descriptor(iview, device,
1735 format, &pCreateInfo->components,
1736 true, disable_compression,
1737 iview->plane_id + i, i);
1738 }
1739 }
1740
radv_layout_is_htile_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)1741 bool radv_layout_is_htile_compressed(const struct radv_device *device,
1742 const struct radv_image *image,
1743 VkImageLayout layout,
1744 bool in_render_loop,
1745 unsigned queue_mask)
1746 {
1747 if (radv_image_is_tc_compat_htile(image)) {
1748 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1749 !in_render_loop &&
1750 !device->instance->disable_tc_compat_htile_in_general &&
1751 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1752 /* It should be safe to enable TC-compat HTILE with
1753 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1754 * loop and if the image doesn't have the storage bit
1755 * set. This improves performance for apps that use
1756 * GENERAL for the main depth pass because this allows
1757 * compression and this reduces the number of
1758 * decompressions from/to GENERAL.
1759 */
1760 return true;
1761 }
1762
1763 return layout != VK_IMAGE_LAYOUT_GENERAL;
1764 }
1765
1766 return radv_image_has_htile(image) &&
1767 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1768 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1769 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1770 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1771 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1772 }
1773
radv_layout_can_fast_clear(const struct radv_image * image,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)1774 bool radv_layout_can_fast_clear(const struct radv_image *image,
1775 VkImageLayout layout,
1776 bool in_render_loop,
1777 unsigned queue_mask)
1778 {
1779 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
1780 queue_mask == (1u << RADV_QUEUE_GENERAL);
1781 }
1782
radv_layout_dcc_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)1783 bool radv_layout_dcc_compressed(const struct radv_device *device,
1784 const struct radv_image *image,
1785 VkImageLayout layout,
1786 bool in_render_loop,
1787 unsigned queue_mask)
1788 {
1789 /* Don't compress compute transfer dst, as image stores are not supported. */
1790 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1791 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1792 return false;
1793
1794 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1795 }
1796
1797
radv_image_queue_family_mask(const struct radv_image * image,uint32_t family,uint32_t queue_family)1798 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1799 {
1800 if (!image->exclusive)
1801 return image->queue_family_mask;
1802 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1803 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1804 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1805 if (family == VK_QUEUE_FAMILY_IGNORED)
1806 return 1u << queue_family;
1807 return 1u << family;
1808 }
1809
1810 VkResult
radv_CreateImage(VkDevice device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)1811 radv_CreateImage(VkDevice device,
1812 const VkImageCreateInfo *pCreateInfo,
1813 const VkAllocationCallbacks *pAllocator,
1814 VkImage *pImage)
1815 {
1816 #ifdef ANDROID
1817 const VkNativeBufferANDROID *gralloc_info =
1818 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1819
1820 if (gralloc_info)
1821 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1822 pAllocator, pImage);
1823 #endif
1824
1825 const struct wsi_image_create_info *wsi_info =
1826 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1827 bool scanout = wsi_info && wsi_info->scanout;
1828
1829 return radv_image_create(device,
1830 &(struct radv_image_create_info) {
1831 .vk_info = pCreateInfo,
1832 .scanout = scanout,
1833 },
1834 pAllocator,
1835 pImage);
1836 }
1837
1838 void
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)1839 radv_DestroyImage(VkDevice _device, VkImage _image,
1840 const VkAllocationCallbacks *pAllocator)
1841 {
1842 RADV_FROM_HANDLE(radv_device, device, _device);
1843 RADV_FROM_HANDLE(radv_image, image, _image);
1844
1845 if (!image)
1846 return;
1847
1848 radv_destroy_image(device, pAllocator, image);
1849 }
1850
radv_GetImageSubresourceLayout(VkDevice _device,VkImage _image,const VkImageSubresource * pSubresource,VkSubresourceLayout * pLayout)1851 void radv_GetImageSubresourceLayout(
1852 VkDevice _device,
1853 VkImage _image,
1854 const VkImageSubresource* pSubresource,
1855 VkSubresourceLayout* pLayout)
1856 {
1857 RADV_FROM_HANDLE(radv_image, image, _image);
1858 RADV_FROM_HANDLE(radv_device, device, _device);
1859 int level = pSubresource->mipLevel;
1860 int layer = pSubresource->arrayLayer;
1861
1862 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1863
1864 struct radv_image_plane *plane = &image->planes[plane_id];
1865 struct radeon_surf *surface = &plane->surface;
1866
1867 if (device->physical_device->rad_info.chip_class >= GFX9) {
1868 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1869
1870 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1871 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1872 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1873 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1874 /* Adjust the number of bytes between each row because
1875 * the pitch is actually the number of components per
1876 * row.
1877 */
1878 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1879 } else {
1880 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1881
1882 assert(util_is_power_of_two_nonzero(surface->bpe));
1883 pLayout->rowPitch = pitch * surface->bpe;
1884 }
1885
1886 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1887 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1888 pLayout->size = surface->u.gfx9.surf_slice_size;
1889 if (image->type == VK_IMAGE_TYPE_3D)
1890 pLayout->size *= u_minify(image->info.depth, level);
1891 } else {
1892 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1893 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1894 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1895 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1896 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1897 if (image->type == VK_IMAGE_TYPE_3D)
1898 pLayout->size *= u_minify(image->info.depth, level);
1899 }
1900 }
1901
1902
1903 VkResult
radv_CreateImageView(VkDevice _device,const VkImageViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImageView * pView)1904 radv_CreateImageView(VkDevice _device,
1905 const VkImageViewCreateInfo *pCreateInfo,
1906 const VkAllocationCallbacks *pAllocator,
1907 VkImageView *pView)
1908 {
1909 RADV_FROM_HANDLE(radv_device, device, _device);
1910 struct radv_image_view *view;
1911
1912 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1913 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1914 if (view == NULL)
1915 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1916
1917 vk_object_base_init(&device->vk, &view->base,
1918 VK_OBJECT_TYPE_IMAGE_VIEW);
1919
1920 radv_image_view_init(view, device, pCreateInfo, NULL);
1921
1922 *pView = radv_image_view_to_handle(view);
1923
1924 return VK_SUCCESS;
1925 }
1926
1927 void
radv_DestroyImageView(VkDevice _device,VkImageView _iview,const VkAllocationCallbacks * pAllocator)1928 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1929 const VkAllocationCallbacks *pAllocator)
1930 {
1931 RADV_FROM_HANDLE(radv_device, device, _device);
1932 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1933
1934 if (!iview)
1935 return;
1936
1937 vk_object_base_finish(&iview->base);
1938 vk_free2(&device->vk.alloc, pAllocator, iview);
1939 }
1940
radv_buffer_view_init(struct radv_buffer_view * view,struct radv_device * device,const VkBufferViewCreateInfo * pCreateInfo)1941 void radv_buffer_view_init(struct radv_buffer_view *view,
1942 struct radv_device *device,
1943 const VkBufferViewCreateInfo* pCreateInfo)
1944 {
1945 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1946
1947 view->bo = buffer->bo;
1948 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1949 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1950 view->vk_format = pCreateInfo->format;
1951
1952 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1953 pCreateInfo->offset, view->range, view->state);
1954 }
1955
1956 VkResult
radv_CreateBufferView(VkDevice _device,const VkBufferViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBufferView * pView)1957 radv_CreateBufferView(VkDevice _device,
1958 const VkBufferViewCreateInfo *pCreateInfo,
1959 const VkAllocationCallbacks *pAllocator,
1960 VkBufferView *pView)
1961 {
1962 RADV_FROM_HANDLE(radv_device, device, _device);
1963 struct radv_buffer_view *view;
1964
1965 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1966 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1967 if (!view)
1968 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1969
1970 vk_object_base_init(&device->vk, &view->base,
1971 VK_OBJECT_TYPE_BUFFER_VIEW);
1972
1973 radv_buffer_view_init(view, device, pCreateInfo);
1974
1975 *pView = radv_buffer_view_to_handle(view);
1976
1977 return VK_SUCCESS;
1978 }
1979
1980 void
radv_DestroyBufferView(VkDevice _device,VkBufferView bufferView,const VkAllocationCallbacks * pAllocator)1981 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1982 const VkAllocationCallbacks *pAllocator)
1983 {
1984 RADV_FROM_HANDLE(radv_device, device, _device);
1985 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1986
1987 if (!view)
1988 return;
1989
1990 vk_object_base_finish(&view->base);
1991 vk_free2(&device->vk.alloc, pAllocator, view);
1992 }
1993