• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  * SPDX-License-Identifier: MIT
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  */
9 
10 #include "tu_image.h"
11 
12 #include "fdl/fd6_format_table.h"
13 
14 #include "util/u_debug.h"
15 #include "util/format/u_format.h"
16 #include "vk_util.h"
17 #include "drm-uapi/drm_fourcc.h"
18 
19 #include "tu_android.h"
20 #include "tu_cs.h"
21 #include "tu_descriptor_set.h"
22 #include "tu_device.h"
23 #include "tu_formats.h"
24 
25 uint32_t
tu6_plane_count(VkFormat format)26 tu6_plane_count(VkFormat format)
27 {
28    switch (format) {
29    default:
30       return 1;
31    case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
32    case VK_FORMAT_D32_SFLOAT_S8_UINT:
33       return 2;
34    case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
35       return 3;
36    }
37 }
38 
39 enum pipe_format
tu6_plane_format(VkFormat format,uint32_t plane)40 tu6_plane_format(VkFormat format, uint32_t plane)
41 {
42    switch (format) {
43    case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
44       return plane ? PIPE_FORMAT_R8G8_UNORM : PIPE_FORMAT_Y8_UNORM;
45    case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
46       return PIPE_FORMAT_R8_UNORM;
47    case VK_FORMAT_D32_SFLOAT_S8_UINT:
48       return plane ? PIPE_FORMAT_S8_UINT : PIPE_FORMAT_Z32_FLOAT;
49    default:
50       return tu_vk_format_to_pipe_format(format);
51    }
52 }
53 
54 uint32_t
tu6_plane_index(VkFormat format,VkImageAspectFlags aspect_mask)55 tu6_plane_index(VkFormat format, VkImageAspectFlags aspect_mask)
56 {
57    switch (aspect_mask) {
58    default:
59       assert(aspect_mask != VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT);
60       return 0;
61    case VK_IMAGE_ASPECT_PLANE_1_BIT:
62    case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
63       return 1;
64    case VK_IMAGE_ASPECT_PLANE_2_BIT:
65    case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
66       return 2;
67    case VK_IMAGE_ASPECT_STENCIL_BIT:
68       return format == VK_FORMAT_D32_SFLOAT_S8_UINT;
69    }
70 }
71 
72 enum pipe_format
tu_format_for_aspect(enum pipe_format format,VkImageAspectFlags aspect_mask)73 tu_format_for_aspect(enum pipe_format format, VkImageAspectFlags aspect_mask)
74 {
75    switch (format) {
76    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
77       /* VK_IMAGE_ASPECT_COLOR_BIT is used internally for blits (despite we
78        * also incorrectly advertise VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT for
79        * depth formats).  Return PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8 in
80        * this case.
81        *
82        * Otherwise, return the appropriate pipe format and let fdl6_view_init
83        * take care of the rest.
84        */
85       if (aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT)
86          return PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
87       if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
88          if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
89             return PIPE_FORMAT_Z24_UNORM_S8_UINT;
90          else
91             return PIPE_FORMAT_X24S8_UINT;
92       } else {
93          return PIPE_FORMAT_Z24X8_UNORM;
94       }
95    case PIPE_FORMAT_Z24X8_UNORM:
96       if (aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT)
97          return PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
98       return PIPE_FORMAT_Z24X8_UNORM;
99    default:
100       return format;
101    }
102 }
103 
104 static bool
tu_is_r8g8(enum pipe_format format)105 tu_is_r8g8(enum pipe_format format)
106 {
107    return (util_format_get_blocksize(format) == 2) &&
108           (util_format_get_nr_components(format) == 2);
109 }
110 
111 static bool
tu_is_r8g8_compatible(enum pipe_format format)112 tu_is_r8g8_compatible(enum pipe_format format)
113 {
114    return (util_format_get_blocksize(format) == 2) &&
115           !util_format_is_depth_or_stencil(format);
116 }
117 
118 void
tu_cs_image_ref(struct tu_cs * cs,const struct fdl6_view * iview,uint32_t layer)119 tu_cs_image_ref(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer)
120 {
121    tu_cs_emit(cs, A6XX_RB_MRT_PITCH(0, iview->pitch).value);
122    tu_cs_emit(cs, iview->layer_size >> 6);
123    tu_cs_emit_qw(cs, iview->base_addr + iview->layer_size * layer);
124 }
125 
126 void
tu_cs_image_stencil_ref(struct tu_cs * cs,const struct tu_image_view * iview,uint32_t layer)127 tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
128 {
129    tu_cs_emit(cs, A6XX_RB_STENCIL_BUFFER_PITCH(iview->stencil_pitch).value);
130    tu_cs_emit(cs, iview->stencil_layer_size >> 6);
131    tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
132 }
133 
134 void
tu_cs_image_depth_ref(struct tu_cs * cs,const struct tu_image_view * iview,uint32_t layer)135 tu_cs_image_depth_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
136 {
137    tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(iview->depth_pitch).value);
138    tu_cs_emit(cs, iview->depth_layer_size >> 6);
139    tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer);
140 }
141 
142 template <chip CHIP>
143 void
tu_cs_image_ref_2d(struct tu_cs * cs,const struct fdl6_view * iview,uint32_t layer,bool src)144 tu_cs_image_ref_2d(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, bool src)
145 {
146    tu_cs_emit_qw(cs, iview->base_addr + iview->layer_size * layer);
147    /* SP_PS_2D_SRC_PITCH has shifted pitch field */
148    if (src)
149       tu_cs_emit(cs, SP_PS_2D_SRC_PITCH(CHIP, .pitch = iview->pitch).value);
150    else
151       tu_cs_emit(cs, A6XX_RB_2D_DST_PITCH(iview->pitch).value);
152 }
153 TU_GENX(tu_cs_image_ref_2d);
154 
155 void
tu_cs_image_flag_ref(struct tu_cs * cs,const struct fdl6_view * iview,uint32_t layer)156 tu_cs_image_flag_ref(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer)
157 {
158    tu_cs_emit_qw(cs, iview->ubwc_addr + iview->ubwc_layer_size * layer);
159    tu_cs_emit(cs, iview->FLAG_BUFFER_PITCH);
160 }
161 
162 static void
tu_image_view_init(struct tu_device * device,struct tu_image_view * iview,const VkImageViewCreateInfo * pCreateInfo,bool has_z24uint_s8uint)163 tu_image_view_init(struct tu_device *device,
164                    struct tu_image_view *iview,
165                    const VkImageViewCreateInfo *pCreateInfo,
166                    bool has_z24uint_s8uint)
167 {
168    TU_FROM_HANDLE(tu_image, image, pCreateInfo->image);
169    const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
170    VkFormat vk_format = pCreateInfo->format;
171    VkImageAspectFlags aspect_mask = pCreateInfo->subresourceRange.aspectMask;
172 
173    const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
174       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
175    const struct tu_sampler_ycbcr_conversion *conversion = ycbcr_conversion ?
176       tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
177 
178    vk_image_view_init(&device->vk, &iview->vk, false, pCreateInfo);
179 
180    iview->image = image;
181 
182    const struct fdl_layout *layouts[3];
183 
184    layouts[0] = &image->layout[tu6_plane_index(image->vk.format, aspect_mask)];
185 
186    enum pipe_format format;
187    if (aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT)
188       format = tu6_plane_format(vk_format, tu6_plane_index(vk_format, aspect_mask));
189    else
190       format = tu_vk_format_to_pipe_format(vk_format);
191 
192    if (image->vk.format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM &&
193        aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT) {
194       if (vk_format == VK_FORMAT_R8_UNORM) {
195          /* The 0'th plane of this format has a different UBWC compression. */
196          format = PIPE_FORMAT_Y8_UNORM;
197       } else {
198          /* If the user wants to reinterpret this plane, then they should've
199           * set MUTABLE_FORMAT_BIT which should disable UBWC and tiling.
200           */
201          assert(!layouts[0]->ubwc);
202       }
203    }
204 
205    if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT &&
206        (vk_format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM ||
207         vk_format == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM)) {
208       layouts[1] = &image->layout[1];
209       layouts[2] = &image->layout[2];
210    }
211 
212    vk_component_mapping_to_pipe_swizzle(pCreateInfo->components,
213                                         iview->swizzle);
214 
215    struct fdl_view_args args = {};
216    args.chip = device->physical_device->info->chip;
217    args.iova = image->iova;
218    args.base_array_layer = range->baseArrayLayer;
219    args.base_miplevel = range->baseMipLevel;
220    args.layer_count = vk_image_subresource_layer_count(&image->vk, range);
221    args.level_count = vk_image_subresource_level_count(&image->vk, range);
222    args.min_lod_clamp = iview->vk.min_lod;
223    args.format = tu_format_for_aspect(format, aspect_mask);
224    vk_component_mapping_to_pipe_swizzle(pCreateInfo->components, args.swiz);
225    if (conversion) {
226       unsigned char conversion_swiz[4], create_swiz[4];
227       memcpy(create_swiz, args.swiz, sizeof(create_swiz));
228       vk_component_mapping_to_pipe_swizzle(conversion->components,
229                                            conversion_swiz);
230       util_format_compose_swizzles(create_swiz, conversion_swiz, args.swiz);
231    }
232 
233    switch (pCreateInfo->viewType) {
234    case VK_IMAGE_VIEW_TYPE_1D:
235    case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
236       args.type = FDL_VIEW_TYPE_1D;
237       break;
238    case VK_IMAGE_VIEW_TYPE_2D:
239    case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
240       args.type = FDL_VIEW_TYPE_2D;
241       break;
242    case VK_IMAGE_VIEW_TYPE_CUBE:
243    case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
244       args.type = FDL_VIEW_TYPE_CUBE;
245       break;
246    case VK_IMAGE_VIEW_TYPE_3D:
247       args.type = FDL_VIEW_TYPE_3D;
248       break;
249    default:
250       unreachable("unknown view type");
251    }
252 
253    STATIC_ASSERT((unsigned)VK_CHROMA_LOCATION_COSITED_EVEN == (unsigned)FDL_CHROMA_LOCATION_COSITED_EVEN);
254    STATIC_ASSERT((unsigned)VK_CHROMA_LOCATION_MIDPOINT == (unsigned)FDL_CHROMA_LOCATION_MIDPOINT);
255    if (conversion) {
256       args.chroma_offsets[0] = (enum fdl_chroma_location) conversion->chroma_offsets[0];
257       args.chroma_offsets[1] = (enum fdl_chroma_location) conversion->chroma_offsets[1];
258    }
259 
260    fdl6_view_init(&iview->view, layouts, &args, has_z24uint_s8uint);
261 
262    if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
263       struct fdl_layout *layout = &image->layout[0];
264       iview->depth_base_addr = image->iova +
265          fdl_surface_offset(layout, range->baseMipLevel, range->baseArrayLayer);
266       iview->depth_layer_size = fdl_layer_stride(layout, range->baseMipLevel);
267       iview->depth_pitch = fdl_pitch(layout, range->baseMipLevel);
268 
269       layout = &image->layout[1];
270       iview->stencil_base_addr = image->iova +
271          fdl_surface_offset(layout, range->baseMipLevel, range->baseArrayLayer);
272       iview->stencil_layer_size = fdl_layer_stride(layout, range->baseMipLevel);
273       iview->stencil_pitch = fdl_pitch(layout, range->baseMipLevel);
274    }
275 }
276 
277 bool
tiling_possible(VkFormat format)278 tiling_possible(VkFormat format)
279 {
280    if (format == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM ||
281        format == VK_FORMAT_G8B8G8R8_422_UNORM ||
282        format == VK_FORMAT_B8G8R8G8_422_UNORM)
283       return false;
284 
285    return true;
286 }
287 
288 /* Checks if we should advertise UBWC support for the given usage.
289  *
290  * Used by both vkCreateImage and vkGetPhysicalDeviceFormatProperties2, so the
291  * logical tu_device may be NULL.
292  */
293 bool
ubwc_possible(struct tu_device * device,VkFormat format,VkImageType type,VkImageUsageFlags usage,VkImageUsageFlags stencil_usage,const struct fd_dev_info * info,VkSampleCountFlagBits samples,bool use_z24uint_s8uint)294 ubwc_possible(struct tu_device *device,
295               VkFormat format,
296               VkImageType type,
297               VkImageUsageFlags usage,
298               VkImageUsageFlags stencil_usage,
299               const struct fd_dev_info *info,
300               VkSampleCountFlagBits samples,
301               bool use_z24uint_s8uint)
302 {
303    /* no UBWC with compressed formats, E5B9G9R9, S8_UINT
304     * (S8_UINT because separate stencil doesn't have UBWC-enable bit)
305     */
306    if (vk_format_is_compressed(format) ||
307        format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 ||
308        format == VK_FORMAT_S8_UINT)
309       return false;
310 
311    /* In copy_format, we treat snorm as unorm to avoid clamping.  But snorm
312     * and unorm are UBWC incompatible for special values such as all 0's or
313     * all 1's prior to a740.  Disable UBWC for snorm.
314     */
315    if (vk_format_is_snorm(format) &&
316        !info->a7xx.ubwc_unorm_snorm_int_compatible)
317       return false;
318 
319    if (!info->a6xx.has_8bpp_ubwc &&
320        vk_format_get_blocksizebits(format) == 8 &&
321        tu6_plane_count(format) == 1)
322       return false;
323 
324    if (type == VK_IMAGE_TYPE_3D) {
325       if (device) {
326          perf_debug(device,
327                     "Disabling UBWC for %s 3D image, but it should be "
328                     "possible to support.",
329                     util_format_name(vk_format_to_pipe_format(format)));
330       }
331       return false;
332    }
333 
334    /* Disable UBWC for storage images when not supported.
335     *
336     * Prior to a7xx, storage images must be readonly or writeonly to use UBWC.
337     * Freedreno can determine when this isn't the case and decompress the
338     * image on-the-fly, but we don't know which image a binding corresponds to
339     * and we can't change the descriptor so we can't do this.
340     */
341    if (((usage | stencil_usage) & VK_IMAGE_USAGE_STORAGE_BIT) &&
342        !info->a6xx.supports_ibo_ubwc) {
343       return false;
344    }
345 
346    /* A690 seem to have broken UBWC for depth/stencil, it requires
347     * depth flushing where we cannot realistically place it, like between
348     * ordinary draw calls writing read/depth. WSL blob seem to use ubwc
349     * sometimes for depth/stencil.
350     */
351    if (info->a6xx.broken_ds_ubwc_quirk &&
352        vk_format_is_depth_or_stencil(format))
353       return false;
354 
355    /* Disable UBWC for D24S8 on A630 in some cases
356     *
357     * VK_IMAGE_ASPECT_STENCIL_BIT image view requires to be able to sample
358     * from the stencil component as UINT, however no format allows this
359     * on a630 (the special FMT6_Z24_UINT_S8_UINT format is missing)
360     *
361     * It must be sampled as FMT6_8_8_8_8_UINT, which is not UBWC-compatible
362     *
363     * If we wish to get the border colors correct without knowing the format
364     * when creating the sampler, we also have to use the A630 workaround.
365     */
366    if (!use_z24uint_s8uint &&
367        format == VK_FORMAT_D24_UNORM_S8_UINT &&
368        (stencil_usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))
369       return false;
370 
371    if (!info->a6xx.has_z24uint_s8uint &&
372        (format == VK_FORMAT_D24_UNORM_S8_UINT ||
373         format == VK_FORMAT_X8_D24_UNORM_PACK32) &&
374        samples > VK_SAMPLE_COUNT_1_BIT) {
375       return false;
376    }
377 
378    return true;
379 }
380 
381 /* R8G8 have a different block width/height and height alignment from other
382  * formats that would normally be compatible (like R16), and so if we are
383  * trying to, for example, sample R16 as R8G8 we need to demote to linear.
384  */
385 static bool
format_list_reinterprets_r8g8_r16(enum pipe_format format,const VkImageFormatListCreateInfo * fmt_list)386 format_list_reinterprets_r8g8_r16(enum pipe_format format, const VkImageFormatListCreateInfo *fmt_list)
387 {
388    /* Check if it's actually a 2-cpp color format. */
389    if (!tu_is_r8g8_compatible(format))
390       return false;
391 
392    /* If there's no format list, then the app may reinterpret to any compatible
393     * format.
394     */
395    if (!fmt_list || !fmt_list->viewFormatCount)
396       return true;
397 
398    bool has_r8g8 = false;
399    bool has_non_r8g8 = false;
400    for (uint32_t i = 0; i < fmt_list->viewFormatCount; i++) {
401       enum pipe_format format =
402          tu_vk_format_to_pipe_format(fmt_list->pViewFormats[i]);
403       if (tu_is_r8g8(format))
404          has_r8g8 = true;
405       else
406          has_non_r8g8 = true;
407    }
408    return has_r8g8 && has_non_r8g8;
409 }
410 
411 static bool
format_list_has_swaps(const VkImageFormatListCreateInfo * fmt_list)412 format_list_has_swaps(const VkImageFormatListCreateInfo *fmt_list)
413 {
414    /* If there's no format list, then the app may reinterpret to any compatible
415     * format, and presumably one would have the swap set.
416     */
417    if (!fmt_list || !fmt_list->viewFormatCount)
418       return true;
419 
420    for (uint32_t i = 0; i < fmt_list->viewFormatCount; i++) {
421       enum pipe_format format =
422          tu_vk_format_to_pipe_format(fmt_list->pViewFormats[i]);
423 
424       if (tu6_format_texture(format, TILE6_LINEAR).swap)
425          return true;
426    }
427    return false;
428 }
429 
430 static VkResult
tu_image_init(struct tu_device * device,struct tu_image * image,const VkImageCreateInfo * pCreateInfo,uint64_t modifier,const VkSubresourceLayout * plane_layouts)431 tu_image_init(struct tu_device *device, struct tu_image *image,
432               const VkImageCreateInfo *pCreateInfo, uint64_t modifier,
433               const VkSubresourceLayout *plane_layouts)
434 {
435    vk_image_init(&device->vk, &image->vk, pCreateInfo);
436    image->vk.drm_format_mod = modifier;
437 
438    enum a6xx_tile_mode tile_mode = TILE6_3;
439    bool ubwc_enabled = true;
440 
441    /* use linear tiling if requested */
442    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR || modifier == DRM_FORMAT_MOD_LINEAR) {
443       tile_mode = TILE6_LINEAR;
444       ubwc_enabled = false;
445    }
446 
447    /* Force linear tiling for formats with "fake" optimalTilingFeatures */
448    if (!tiling_possible(image->vk.format)) {
449       tile_mode = TILE6_LINEAR;
450       ubwc_enabled = false;
451    }
452 
453    /* No sense in tiling a 1D image, you'd just waste space and cache locality. */
454    if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D) {
455       tile_mode = TILE6_LINEAR;
456       ubwc_enabled = false;
457    }
458 
459    /* Fragment density maps are sampled on the CPU and we don't support
460     * sampling tiled images on the CPU or UBWC at the moment.
461     */
462    if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT) {
463       tile_mode = TILE6_LINEAR;
464       ubwc_enabled = false;
465    }
466 
467    enum pipe_format format =
468       tu_vk_format_to_pipe_format(image->vk.format);
469    /* Whether a view of the image with an R8G8 format could be made. */
470    bool has_r8g8 = tu_is_r8g8(format);
471 
472    if (ubwc_enabled &&
473        !ubwc_possible(device, image->vk.format, pCreateInfo->imageType,
474                       pCreateInfo->usage, image->vk.stencil_usage,
475                       device->physical_device->info, pCreateInfo->samples,
476                       device->use_z24uint_s8uint))
477       ubwc_enabled = false;
478 
479    /* Mutable images can be reinterpreted as any other compatible format.
480     * This is a problem with UBWC (compression for different formats is different),
481     * but also tiling ("swap" affects how tiled formats are stored in memory)
482     * Depth and stencil formats cannot be reintepreted as another format, and
483     * cannot be linear with sysmem rendering, so don't fall back for those.
484     *
485     * TODO:
486     * - if the fmt_list contains only formats which are swapped, but compatible
487     *   with each other (B8G8R8A8_UNORM and B8G8R8A8_UINT for example), then
488     *   tiling is still possible
489     * - figure out which UBWC compressions are compatible to keep it enabled
490     */
491    if ((pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
492        !vk_format_is_depth_or_stencil(image->vk.format)) {
493       const VkImageFormatListCreateInfo *fmt_list =
494          vk_find_struct_const(pCreateInfo->pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
495       if (!tu6_mutable_format_list_ubwc_compatible(device->physical_device->info,
496                                                    fmt_list)) {
497          if (ubwc_enabled) {
498             if (fmt_list && fmt_list->viewFormatCount == 2) {
499                perf_debug(
500                   device,
501                   "Disabling UBWC on %dx%d %s resource due to mutable formats "
502                   "(fmt list %s, %s)",
503                   image->vk.extent.width, image->vk.extent.height,
504                   util_format_name(vk_format_to_pipe_format(image->vk.format)),
505                   util_format_name(vk_format_to_pipe_format(fmt_list->pViewFormats[0])),
506                   util_format_name(vk_format_to_pipe_format(fmt_list->pViewFormats[1])));
507             } else {
508                perf_debug(
509                   device,
510                   "Disabling UBWC on %dx%d %s resource due to mutable formats "
511                   "(fmt list %s)",
512                   image->vk.extent.width, image->vk.extent.height,
513                   util_format_name(vk_format_to_pipe_format(image->vk.format)),
514                   fmt_list ? "present" : "missing");
515             }
516             ubwc_enabled = false;
517          }
518 
519          if (format_list_reinterprets_r8g8_r16(format, fmt_list) ||
520             format_list_has_swaps(fmt_list)) {
521             tile_mode = TILE6_LINEAR;
522          }
523       }
524    }
525 
526    /* expect UBWC enabled if we asked for it */
527    if (modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED)
528       assert(ubwc_enabled);
529    else if (TU_DEBUG(NOUBWC))
530       ubwc_enabled = false;
531 
532    /* Non-UBWC tiled R8G8 is probably buggy since media formats are always
533     * either linear or UBWC. There is no simple test to reproduce the bug.
534     * However it was observed in the wild leading to an unrecoverable hang
535     * on a650/a660.
536     */
537    if (has_r8g8 && tile_mode == TILE6_3 && !ubwc_enabled) {
538       tile_mode = TILE6_LINEAR;
539    }
540 
541    for (uint32_t i = 0; i < tu6_plane_count(image->vk.format); i++) {
542       struct fdl_layout *layout = &image->layout[i];
543       enum pipe_format format = tu6_plane_format(image->vk.format, i);
544       uint32_t width0 = pCreateInfo->extent.width;
545       uint32_t height0 = pCreateInfo->extent.height;
546 
547       if (i > 0) {
548          switch (image->vk.format) {
549          case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
550          case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
551             /* half width/height on chroma planes */
552             width0 = (width0 + 1) >> 1;
553             height0 = (height0 + 1) >> 1;
554             break;
555          case VK_FORMAT_D32_SFLOAT_S8_UINT:
556             /* no UBWC for separate stencil */
557             ubwc_enabled = false;
558             break;
559          default:
560             break;
561          }
562       }
563 
564       struct fdl_explicit_layout plane_layout;
565 
566       if (plane_layouts) {
567          /* only expect simple 2D images for now */
568          if (pCreateInfo->mipLevels != 1 ||
569             pCreateInfo->arrayLayers != 1 ||
570             pCreateInfo->extent.depth != 1)
571             return vk_error(device, VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
572 
573          plane_layout.offset = plane_layouts[i].offset;
574          plane_layout.pitch = plane_layouts[i].rowPitch;
575          /* note: use plane_layouts[0].arrayPitch to support array formats */
576       }
577 
578       layout->tile_mode = tile_mode;
579       layout->ubwc = ubwc_enabled;
580 
581       if (!fdl6_layout(layout, format,
582                        pCreateInfo->samples,
583                        width0, height0,
584                        pCreateInfo->extent.depth,
585                        pCreateInfo->mipLevels,
586                        pCreateInfo->arrayLayers,
587                        pCreateInfo->imageType == VK_IMAGE_TYPE_3D,
588                        plane_layouts ? &plane_layout : NULL)) {
589          assert(plane_layouts); /* can only fail with explicit layout */
590          return vk_error(device, VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
591       }
592 
593       if (TU_DEBUG(LAYOUT))
594          fdl_dump_layout(layout);
595 
596       /* fdl6_layout can't take explicit offset without explicit pitch
597        * add offset manually for extra layouts for planes
598        */
599       if (!plane_layouts && i > 0) {
600          uint32_t offset = ALIGN_POT(image->total_size, 4096);
601          for (int i = 0; i < pCreateInfo->mipLevels; i++) {
602             layout->slices[i].offset += offset;
603             layout->ubwc_slices[i].offset += offset;
604          }
605          layout->size += offset;
606       }
607 
608       image->total_size = MAX2(image->total_size, layout->size);
609    }
610 
611    const struct util_format_description *desc = util_format_description(image->layout[0].format);
612    if (util_format_has_depth(desc) && device->use_lrz) {
613       /* Depth plane is the first one */
614       struct fdl_layout *layout = &image->layout[0];
615       unsigned width = layout->width0;
616       unsigned height = layout->height0;
617 
618       /* LRZ buffer is super-sampled */
619       switch (layout->nr_samples) {
620       case 4:
621          width *= 2;
622          FALLTHROUGH;
623       case 2:
624          height *= 2;
625          break;
626       default:
627          break;
628       }
629 
630       unsigned lrz_pitch  = align(DIV_ROUND_UP(width, 8), 32);
631       unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 16);
632 
633       image->lrz_height = lrz_height;
634       image->lrz_pitch = lrz_pitch;
635       image->lrz_offset = image->total_size;
636       unsigned lrz_size = lrz_pitch * lrz_height * 2;
637       image->total_size += lrz_size;
638 
639       unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width, 8), 16);
640       unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height, 8), 4);
641 
642       /* Fast-clear buffer is 1bit/block */
643       image->lrz_fc_size = DIV_ROUND_UP(nblocksx * nblocksy, 8);
644 
645       /* Fast-clear buffer cannot be larger than 512 bytes (HW limitation) */
646       bool has_lrz_fc = image->lrz_fc_size <= 512 &&
647          device->physical_device->info->a6xx.enable_lrz_fast_clear &&
648          !TU_DEBUG(NOLRZFC);
649 
650       if (has_lrz_fc || device->physical_device->info->a6xx.has_lrz_dir_tracking) {
651          image->lrz_fc_offset = image->total_size;
652          image->total_size += 512;
653 
654          if (device->physical_device->info->a6xx.has_lrz_dir_tracking) {
655             /* Direction tracking uses 1 byte */
656             image->total_size += 1;
657             /* GRAS_LRZ_DEPTH_VIEW needs 5 bytes: 4 for view data and 1 for padding */
658             image->total_size += 5;
659          }
660       }
661 
662       if (!has_lrz_fc) {
663          image->lrz_fc_size = 0;
664       }
665    } else {
666       image->lrz_height = 0;
667    }
668 
669    return VK_SUCCESS;
670 }
671 
672 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateImage(VkDevice _device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * alloc,VkImage * pImage)673 tu_CreateImage(VkDevice _device,
674                const VkImageCreateInfo *pCreateInfo,
675                const VkAllocationCallbacks *alloc,
676                VkImage *pImage)
677 {
678    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
679    const VkSubresourceLayout *plane_layouts = NULL;
680 
681    TU_FROM_HANDLE(tu_device, device, _device);
682    struct tu_image *image = (struct tu_image *)
683       vk_object_zalloc(&device->vk, alloc, sizeof(*image), VK_OBJECT_TYPE_IMAGE);
684 
685    if (!image)
686       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
687 
688    if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
689       const VkImageDrmFormatModifierListCreateInfoEXT *mod_info =
690          vk_find_struct_const(pCreateInfo->pNext,
691                               IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
692       const VkImageDrmFormatModifierExplicitCreateInfoEXT *drm_explicit_info =
693          vk_find_struct_const(pCreateInfo->pNext,
694                               IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
695 
696       assert(mod_info || drm_explicit_info);
697 
698       if (mod_info) {
699          modifier = DRM_FORMAT_MOD_LINEAR;
700          for (unsigned i = 0; i < mod_info->drmFormatModifierCount; i++) {
701             if (mod_info->pDrmFormatModifiers[i] == DRM_FORMAT_MOD_QCOM_COMPRESSED)
702                modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
703          }
704       } else {
705          modifier = drm_explicit_info->drmFormatModifier;
706          assert(modifier == DRM_FORMAT_MOD_LINEAR ||
707                 modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED);
708          plane_layouts = drm_explicit_info->pPlaneLayouts;
709       }
710    } else {
711       const struct wsi_image_create_info *wsi_info =
712          vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
713       if (wsi_info && wsi_info->scanout)
714          modifier = DRM_FORMAT_MOD_LINEAR;
715    }
716 
717 #if DETECT_OS_ANDROID
718    const VkNativeBufferANDROID *gralloc_info =
719       vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
720    int dma_buf;
721    if (gralloc_info) {
722       VkResult result = tu_gralloc_info(device, gralloc_info, &dma_buf, &modifier);
723       if (result != VK_SUCCESS)
724          return result;
725    }
726 #endif
727 
728    VkResult result = tu_image_init(device, image, pCreateInfo, modifier,
729                                    plane_layouts);
730    if (result != VK_SUCCESS) {
731       vk_object_free(&device->vk, alloc, image);
732       return result;
733    }
734 
735    *pImage = tu_image_to_handle(image);
736 
737 #if DETECT_OS_ANDROID
738    if (gralloc_info)
739       return tu_import_memory_from_gralloc_handle(_device, dma_buf, alloc,
740                                                   *pImage);
741 #endif
742    return VK_SUCCESS;
743 }
744 
745 VKAPI_ATTR void VKAPI_CALL
tu_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)746 tu_DestroyImage(VkDevice _device,
747                 VkImage _image,
748                 const VkAllocationCallbacks *pAllocator)
749 {
750    TU_FROM_HANDLE(tu_device, device, _device);
751    TU_FROM_HANDLE(tu_image, image, _image);
752 
753    if (!image)
754       return;
755 
756 #if DETECT_OS_ANDROID
757    if (image->owned_memory != VK_NULL_HANDLE)
758       tu_FreeMemory(_device, image->owned_memory, pAllocator);
759 #endif
760 
761    vk_object_free(&device->vk, pAllocator, image);
762 }
763 
764 static void
tu_get_image_memory_requirements(struct tu_device * dev,struct tu_image * image,VkMemoryRequirements2 * pMemoryRequirements)765 tu_get_image_memory_requirements(struct tu_device *dev, struct tu_image *image,
766                                  VkMemoryRequirements2 *pMemoryRequirements)
767 {
768    pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
769       .size = image->total_size,
770       .alignment = image->layout[0].base_align,
771       .memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1,
772    };
773 
774    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
775       switch (ext->sType) {
776       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
777          VkMemoryDedicatedRequirements *req =
778             (VkMemoryDedicatedRequirements *) ext;
779          req->requiresDedicatedAllocation =
780             image->vk.external_handle_types != 0;
781          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
782          break;
783       }
784       default:
785          break;
786       }
787    }
788 }
789 
790 VKAPI_ATTR void VKAPI_CALL
tu_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)791 tu_GetImageMemoryRequirements2(VkDevice _device,
792                                const VkImageMemoryRequirementsInfo2 *pInfo,
793                                VkMemoryRequirements2 *pMemoryRequirements)
794 {
795    TU_FROM_HANDLE(tu_device, device, _device);
796    TU_FROM_HANDLE(tu_image, image, pInfo->image);
797 
798    tu_get_image_memory_requirements(device, image, pMemoryRequirements);
799 }
800 
801 VKAPI_ATTR void VKAPI_CALL
tu_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)802 tu_GetImageSparseMemoryRequirements2(
803    VkDevice device,
804    const VkImageSparseMemoryRequirementsInfo2 *pInfo,
805    uint32_t *pSparseMemoryRequirementCount,
806    VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
807 {
808    tu_stub();
809 }
810 
811 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceImageMemoryRequirements(VkDevice _device,const VkDeviceImageMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)812 tu_GetDeviceImageMemoryRequirements(
813    VkDevice _device,
814    const VkDeviceImageMemoryRequirements *pInfo,
815    VkMemoryRequirements2 *pMemoryRequirements)
816 {
817    TU_FROM_HANDLE(tu_device, device, _device);
818 
819    struct tu_image image = {0};
820 
821    tu_image_init(device, &image, pInfo->pCreateInfo, DRM_FORMAT_MOD_INVALID,
822                  NULL);
823 
824    tu_get_image_memory_requirements(device, &image, pMemoryRequirements);
825 }
826 
827 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceImageSparseMemoryRequirements(VkDevice device,const VkDeviceImageMemoryRequirements * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)828 tu_GetDeviceImageSparseMemoryRequirements(
829     VkDevice device,
830     const VkDeviceImageMemoryRequirements *pInfo,
831     uint32_t *pSparseMemoryRequirementCount,
832     VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
833 {
834    tu_stub();
835 }
836 
837 static void
tu_get_image_subresource_layout(struct tu_image * image,const VkImageSubresource2KHR * pSubresource,VkSubresourceLayout2KHR * pLayout)838 tu_get_image_subresource_layout(struct tu_image *image,
839                                 const VkImageSubresource2KHR *pSubresource,
840                                 VkSubresourceLayout2KHR *pLayout)
841 {
842    struct fdl_layout *layout =
843       &image->layout[tu6_plane_index(image->vk.format,
844                                      pSubresource->imageSubresource.aspectMask)];
845    const struct fdl_slice *slice = layout->slices +
846       pSubresource->imageSubresource.mipLevel;
847 
848    pLayout->subresourceLayout.offset =
849       fdl_surface_offset(layout, pSubresource->imageSubresource.mipLevel,
850                          pSubresource->imageSubresource.arrayLayer);
851    pLayout->subresourceLayout.rowPitch =
852       fdl_pitch(layout, pSubresource->imageSubresource.mipLevel);
853    pLayout->subresourceLayout.arrayPitch =
854       fdl_layer_stride(layout, pSubresource->imageSubresource.mipLevel);
855    pLayout->subresourceLayout.depthPitch = slice->size0;
856    pLayout->subresourceLayout.size = slice->size0 * layout->depth0;
857 
858    if (fdl_ubwc_enabled(layout, pSubresource->imageSubresource.mipLevel)) {
859       /* UBWC starts at offset 0 */
860       pLayout->subresourceLayout.offset = 0;
861       /* UBWC scanout won't match what the kernel wants if we have levels/layers */
862       assert(image->vk.mip_levels == 1 && image->vk.array_layers == 1);
863    }
864 }
865 
866 VKAPI_ATTR void VKAPI_CALL
tu_GetImageSubresourceLayout2KHR(VkDevice _device,VkImage _image,const VkImageSubresource2KHR * pSubresource,VkSubresourceLayout2KHR * pLayout)867 tu_GetImageSubresourceLayout2KHR(VkDevice _device,
868                                  VkImage _image,
869                                  const VkImageSubresource2KHR *pSubresource,
870                                  VkSubresourceLayout2KHR *pLayout)
871 {
872    TU_FROM_HANDLE(tu_image, image, _image);
873 
874    tu_get_image_subresource_layout(image, pSubresource, pLayout);
875 }
876 
877 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceImageSubresourceLayoutKHR(VkDevice _device,const VkDeviceImageSubresourceInfoKHR * pInfo,VkSubresourceLayout2KHR * pLayout)878 tu_GetDeviceImageSubresourceLayoutKHR(VkDevice _device,
879                                       const VkDeviceImageSubresourceInfoKHR *pInfo,
880                                       VkSubresourceLayout2KHR *pLayout)
881 {
882    TU_FROM_HANDLE(tu_device, device, _device);
883 
884    struct tu_image image = {0};
885 
886    tu_image_init(device, &image, pInfo->pCreateInfo, DRM_FORMAT_MOD_INVALID,
887                  NULL);
888 
889    tu_get_image_subresource_layout(&image, pInfo->pSubresource, pLayout);
890 }
891 
892 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateImageView(VkDevice _device,const VkImageViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImageView * pView)893 tu_CreateImageView(VkDevice _device,
894                    const VkImageViewCreateInfo *pCreateInfo,
895                    const VkAllocationCallbacks *pAllocator,
896                    VkImageView *pView)
897 {
898    TU_FROM_HANDLE(tu_device, device, _device);
899    struct tu_image_view *view;
900 
901    view = (struct tu_image_view *) vk_object_alloc(
902       &device->vk, pAllocator, sizeof(*view), VK_OBJECT_TYPE_IMAGE_VIEW);
903    if (view == NULL)
904       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
905 
906    tu_image_view_init(device, view, pCreateInfo, device->use_z24uint_s8uint);
907 
908    *pView = tu_image_view_to_handle(view);
909 
910    return VK_SUCCESS;
911 }
912 
913 VKAPI_ATTR void VKAPI_CALL
tu_DestroyImageView(VkDevice _device,VkImageView _iview,const VkAllocationCallbacks * pAllocator)914 tu_DestroyImageView(VkDevice _device,
915                     VkImageView _iview,
916                     const VkAllocationCallbacks *pAllocator)
917 {
918    TU_FROM_HANDLE(tu_device, device, _device);
919    TU_FROM_HANDLE(tu_image_view, iview, _iview);
920 
921    if (!iview)
922       return;
923 
924    vk_object_free(&device->vk, pAllocator, iview);
925 }
926 
927 void
tu_buffer_view_init(struct tu_buffer_view * view,struct tu_device * device,const VkBufferViewCreateInfo * pCreateInfo)928 tu_buffer_view_init(struct tu_buffer_view *view,
929                     struct tu_device *device,
930                     const VkBufferViewCreateInfo *pCreateInfo)
931 {
932    TU_FROM_HANDLE(tu_buffer, buffer, pCreateInfo->buffer);
933 
934    view->buffer = buffer;
935 
936    uint32_t range = vk_buffer_range(&buffer->vk, pCreateInfo->offset,
937          pCreateInfo->range);
938    uint8_t swiz[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
939                        PIPE_SWIZZLE_W };
940 
941    fdl6_buffer_view_init(
942       view->descriptor, tu_vk_format_to_pipe_format(pCreateInfo->format),
943       swiz, buffer->iova + pCreateInfo->offset, range);
944 }
945 
946 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateBufferView(VkDevice _device,const VkBufferViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBufferView * pView)947 tu_CreateBufferView(VkDevice _device,
948                     const VkBufferViewCreateInfo *pCreateInfo,
949                     const VkAllocationCallbacks *pAllocator,
950                     VkBufferView *pView)
951 {
952    TU_FROM_HANDLE(tu_device, device, _device);
953    struct tu_buffer_view *view;
954 
955    view = (struct tu_buffer_view *) vk_object_alloc(
956       &device->vk, pAllocator, sizeof(*view), VK_OBJECT_TYPE_BUFFER_VIEW);
957    if (!view)
958       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
959 
960    tu_buffer_view_init(view, device, pCreateInfo);
961 
962    *pView = tu_buffer_view_to_handle(view);
963 
964    return VK_SUCCESS;
965 }
966 
967 VKAPI_ATTR void VKAPI_CALL
tu_DestroyBufferView(VkDevice _device,VkBufferView bufferView,const VkAllocationCallbacks * pAllocator)968 tu_DestroyBufferView(VkDevice _device,
969                      VkBufferView bufferView,
970                      const VkAllocationCallbacks *pAllocator)
971 {
972    TU_FROM_HANDLE(tu_device, device, _device);
973    TU_FROM_HANDLE(tu_buffer_view, view, bufferView);
974 
975    if (!view)
976       return;
977 
978    vk_object_free(&device->vk, pAllocator, view);
979 }
980 
981 /* Impelements the operations described in "Fragment Density Map Operations."
982  */
983 void
tu_fragment_density_map_sample(const struct tu_image_view * fdm,uint32_t x,uint32_t y,uint32_t width,uint32_t height,uint32_t layers,struct tu_frag_area * areas)984 tu_fragment_density_map_sample(const struct tu_image_view *fdm,
985                                uint32_t x, uint32_t y,
986                                uint32_t width, uint32_t height,
987                                uint32_t layers,
988                                struct tu_frag_area *areas)
989 {
990    assert(fdm->image->layout[0].tile_mode == TILE6_LINEAR);
991 
992    uint32_t fdm_shift_x = util_logbase2_ceil(DIV_ROUND_UP(width, fdm->vk.extent.width));
993    uint32_t fdm_shift_y = util_logbase2_ceil(DIV_ROUND_UP(height, fdm->vk.extent.height));
994 
995    fdm_shift_x = CLAMP(fdm_shift_x, MIN_FDM_TEXEL_SIZE_LOG2, MAX_FDM_TEXEL_SIZE_LOG2);
996    fdm_shift_y = CLAMP(fdm_shift_y, MIN_FDM_TEXEL_SIZE_LOG2, MAX_FDM_TEXEL_SIZE_LOG2);
997 
998    uint32_t i = x >> fdm_shift_x;
999    uint32_t j = y >> fdm_shift_y;
1000 
1001    unsigned cpp = fdm->image->layout[0].cpp;
1002    unsigned pitch = fdm->view.pitch;
1003 
1004    void *pixel = (char *)fdm->image->map + fdm->view.offset + cpp * i + pitch * j;
1005    for (unsigned i = 0; i < layers; i++) {
1006       float density_src[4], density[4];
1007       util_format_unpack_rgba(fdm->view.format, density_src, pixel, 1);
1008       pipe_swizzle_4f(density, density_src, fdm->swizzle);
1009       areas[i].width = 1.0f / density[0];
1010       areas[i].height = 1.0f / density[1];
1011 
1012       pixel = (char *)pixel + fdm->view.layer_size;
1013    }
1014 }
1015