• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26 
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 #include "vulkan/runtime/vk_common_entrypoints.h"
30 
31 static uint32_t
meta_blit_key_hash(const void * key)32 meta_blit_key_hash(const void *key)
33 {
34    return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
35 }
36 
37 static bool
meta_blit_key_compare(const void * key1,const void * key2)38 meta_blit_key_compare(const void *key1, const void *key2)
39 {
40    return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
41 }
42 
43 static bool
44 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
45                          VkImageAspectFlags aspect,
46                          struct v3dv_image *image,
47                          VkFormat dst_format,
48                          VkFormat src_format,
49                          struct v3dv_buffer *buffer,
50                          uint32_t buffer_bpp,
51                          VkColorComponentFlags cmask,
52                          VkComponentMapping *cswizzle,
53                          uint32_t region_count,
54                          const VkBufferImageCopy2 *regions);
55 
56 static bool
create_blit_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * descriptor_set_layout,VkPipelineLayout * pipeline_layout)57 create_blit_pipeline_layout(struct v3dv_device *device,
58                             VkDescriptorSetLayout *descriptor_set_layout,
59                             VkPipelineLayout *pipeline_layout)
60 {
61    VkResult result;
62 
63    if (*descriptor_set_layout == 0) {
64       VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
65          .binding = 0,
66          .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
67          .descriptorCount = 1,
68          .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
69       };
70       VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
71          .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
72          .bindingCount = 1,
73          .pBindings = &descriptor_set_layout_binding,
74       };
75       result =
76          v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
77                                         &descriptor_set_layout_info,
78                                         &device->vk.alloc,
79                                         descriptor_set_layout);
80       if (result != VK_SUCCESS)
81          return false;
82    }
83 
84    assert(*pipeline_layout == 0);
85    VkPipelineLayoutCreateInfo pipeline_layout_info = {
86       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
87       .setLayoutCount = 1,
88       .pSetLayouts = descriptor_set_layout,
89       .pushConstantRangeCount = 1,
90       .pPushConstantRanges =
91          &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
92    };
93 
94    result =
95       v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
96                                 &pipeline_layout_info,
97                                 &device->vk.alloc,
98                                 pipeline_layout);
99    return result == VK_SUCCESS;
100 }
101 
102 void
v3dv_meta_blit_init(struct v3dv_device * device)103 v3dv_meta_blit_init(struct v3dv_device *device)
104 {
105    for (uint32_t i = 0; i < 3; i++) {
106       device->meta.blit.cache[i] =
107          _mesa_hash_table_create(NULL,
108                                  meta_blit_key_hash,
109                                  meta_blit_key_compare);
110    }
111 
112    create_blit_pipeline_layout(device,
113                                &device->meta.blit.ds_layout,
114                                &device->meta.blit.p_layout);
115 }
116 
117 void
v3dv_meta_blit_finish(struct v3dv_device * device)118 v3dv_meta_blit_finish(struct v3dv_device *device)
119 {
120    VkDevice _device = v3dv_device_to_handle(device);
121 
122    for (uint32_t i = 0; i < 3; i++) {
123       hash_table_foreach(device->meta.blit.cache[i], entry) {
124          struct v3dv_meta_blit_pipeline *item = entry->data;
125          v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
126          v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
127          v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
128          vk_free(&device->vk.alloc, item);
129       }
130       _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
131    }
132 
133    if (device->meta.blit.p_layout) {
134       v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
135                                  &device->vk.alloc);
136    }
137 
138    if (device->meta.blit.ds_layout) {
139       v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
140                                       &device->vk.alloc);
141    }
142 }
143 
144 static uint32_t
meta_texel_buffer_copy_key_hash(const void * key)145 meta_texel_buffer_copy_key_hash(const void *key)
146 {
147    return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
148 }
149 
150 static bool
meta_texel_buffer_copy_key_compare(const void * key1,const void * key2)151 meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
152 {
153    return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
154 }
155 
156 static bool
create_texel_buffer_copy_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * ds_layout,VkPipelineLayout * p_layout)157 create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
158                                          VkDescriptorSetLayout *ds_layout,
159                                          VkPipelineLayout *p_layout)
160 {
161    VkResult result;
162 
163    if (*ds_layout == 0) {
164       VkDescriptorSetLayoutBinding ds_layout_binding = {
165          .binding = 0,
166          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
167          .descriptorCount = 1,
168          .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
169       };
170       VkDescriptorSetLayoutCreateInfo ds_layout_info = {
171          .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
172          .bindingCount = 1,
173          .pBindings = &ds_layout_binding,
174       };
175       result =
176          v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
177                                         &ds_layout_info,
178                                         &device->vk.alloc,
179                                         ds_layout);
180       if (result != VK_SUCCESS)
181          return false;
182    }
183 
184    assert(*p_layout == 0);
185    /* FIXME: this is abusing a bit the API, since not all of our copy
186     * pipelines have a geometry shader. We could create 2 different pipeline
187     * layouts, but this works for us for now.
188     */
189 #define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET      0
190 #define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET  16
191 #define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET  20
192 #define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET   24
193    VkPushConstantRange ranges[2] = {
194       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
195       { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
196    };
197 
198    VkPipelineLayoutCreateInfo p_layout_info = {
199       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
200       .setLayoutCount = 1,
201       .pSetLayouts = ds_layout,
202       .pushConstantRangeCount = 2,
203       .pPushConstantRanges = ranges,
204    };
205 
206    result =
207       v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
208                                 &p_layout_info,
209                                 &device->vk.alloc,
210                                 p_layout);
211    return result == VK_SUCCESS;
212 }
213 
214 void
v3dv_meta_texel_buffer_copy_init(struct v3dv_device * device)215 v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
216 {
217    for (uint32_t i = 0; i < 3; i++) {
218       device->meta.texel_buffer_copy.cache[i] =
219          _mesa_hash_table_create(NULL,
220                                  meta_texel_buffer_copy_key_hash,
221                                  meta_texel_buffer_copy_key_compare);
222    }
223 
224    create_texel_buffer_copy_pipeline_layout(
225       device,
226       &device->meta.texel_buffer_copy.ds_layout,
227       &device->meta.texel_buffer_copy.p_layout);
228 }
229 
230 void
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device * device)231 v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
232 {
233    VkDevice _device = v3dv_device_to_handle(device);
234 
235    for (uint32_t i = 0; i < 3; i++) {
236       hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
237          struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
238          v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
239          v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
240          v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
241          vk_free(&device->vk.alloc, item);
242       }
243       _mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
244    }
245 
246    if (device->meta.texel_buffer_copy.p_layout) {
247       v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
248                                  &device->vk.alloc);
249    }
250 
251    if (device->meta.texel_buffer_copy.ds_layout) {
252       v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
253                                       &device->vk.alloc);
254    }
255 }
256 
257 static VkFormat
get_compatible_tlb_format(VkFormat format)258 get_compatible_tlb_format(VkFormat format)
259 {
260    switch (format) {
261    case VK_FORMAT_R8G8B8A8_SNORM:
262       return VK_FORMAT_R8G8B8A8_UINT;
263 
264    case VK_FORMAT_R8G8_SNORM:
265       return VK_FORMAT_R8G8_UINT;
266 
267    case VK_FORMAT_R8_SNORM:
268       return VK_FORMAT_R8_UINT;
269 
270    case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
271       return VK_FORMAT_A8B8G8R8_UINT_PACK32;
272 
273    case VK_FORMAT_R16_UNORM:
274    case VK_FORMAT_R16_SNORM:
275       return VK_FORMAT_R16_UINT;
276 
277    case VK_FORMAT_R16G16_UNORM:
278    case VK_FORMAT_R16G16_SNORM:
279       return VK_FORMAT_R16G16_UINT;
280 
281    case VK_FORMAT_R16G16B16A16_UNORM:
282    case VK_FORMAT_R16G16B16A16_SNORM:
283       return VK_FORMAT_R16G16B16A16_UINT;
284 
285    case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
286       return VK_FORMAT_R32_SFLOAT;
287 
288    /* We can't render to compressed formats using the TLB so instead we use
289     * a compatible format with the same bpp as the compressed format. Because
290     * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
291     * case of ETC), when we implement copies with the compatible format we
292     * will have to divide offsets and dimensions on the compressed image by
293     * the compressed block size.
294     */
295    case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
296    case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
297    case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
298    case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
299    case VK_FORMAT_BC2_UNORM_BLOCK:
300    case VK_FORMAT_BC2_SRGB_BLOCK:
301    case VK_FORMAT_BC3_SRGB_BLOCK:
302    case VK_FORMAT_BC3_UNORM_BLOCK:
303    case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
304    case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
305    case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
306    case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
307    case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
308    case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
309    case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
310    case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
311    case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
312    case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
313    case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
314    case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
315    case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
316    case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
317    case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
318    case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
319    case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
320    case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
321    case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
322    case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
323    case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
324    case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
325    case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
326    case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
327    case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
328    case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
329    case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
330    case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
331       return VK_FORMAT_R32G32B32A32_UINT;
332 
333    case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
334    case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
335    case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
336    case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
337    case VK_FORMAT_EAC_R11_UNORM_BLOCK:
338    case VK_FORMAT_EAC_R11_SNORM_BLOCK:
339    case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
340    case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
341    case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
342    case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
343       return VK_FORMAT_R16G16B16A16_UINT;
344 
345    default:
346       return VK_FORMAT_UNDEFINED;
347    }
348 }
349 
350 /**
351  * Checks if we can implement an image copy or clear operation using the TLB
352  * hardware.
353  *
354  * The extent and miplevel are only used to validate tile stores (to match the
355  * region to store against the miplevel dimensions to avoid avoid cases where
356  * the region to store is not a aligned to tile boundaries). If extent is
357  * NULL no checks are done (which is fine if the image will only be used for a
358  * TLB load or when we know in advance that the store will be for the entire
359  * size of the image miplevel).
360  *
361  * For tlb copies we are doing a per-plane copy, so for multi-plane formats,
362  * the compatible format will be single-plane.
363  */
364 bool
v3dv_meta_can_use_tlb(struct v3dv_image * image,uint8_t plane,uint8_t miplevel,const VkOffset3D * offset,const VkExtent3D * extent,VkFormat * compat_format)365 v3dv_meta_can_use_tlb(struct v3dv_image *image,
366                       uint8_t plane,
367                       uint8_t miplevel,
368                       const VkOffset3D *offset,
369                       const VkExtent3D *extent,
370                       VkFormat *compat_format)
371 {
372    if (offset->x != 0 || offset->y != 0)
373       return false;
374 
375    /* FIXME: this is suboptimal, what we really want to check is that the
376     * extent of the region to copy is the full slice or a multiple of the
377     * tile size.
378     */
379    if (extent) {
380       struct v3d_resource_slice *slice = &image->planes[plane].slices[miplevel];
381       if (slice->width != extent->width || slice->height != extent->height)
382          return false;
383    }
384 
385    if (image->format->planes[plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
386       if (compat_format)
387          *compat_format = image->planes[plane].vk_format;
388       return true;
389    }
390 
391    /* If the image format is not TLB-supported, then check if we can use
392     * a compatible format instead.
393     */
394    if (compat_format) {
395       *compat_format = get_compatible_tlb_format(image->planes[plane].vk_format);
396       if (*compat_format != VK_FORMAT_UNDEFINED) {
397          assert(vk_format_get_plane_count(*compat_format) == 1);
398          return true;
399       }
400    }
401 
402    return false;
403 }
404 
405 /* Implements a copy using the TLB.
406  *
407  * This only works if we are copying from offset (0,0), since a TLB store for
408  * tile (x,y) will be written at the same tile offset into the destination.
409  * When this requirement is not met, we need to use a blit instead.
410  *
411  * Returns true if the implementation supports the requested operation (even if
412  * it failed to process it, for example, due to an out-of-memory error).
413  *
414  */
415 static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)416 copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
417                          struct v3dv_buffer *buffer,
418                          struct v3dv_image *image,
419                          const VkBufferImageCopy2 *region)
420 {
421    VkFormat fb_format;
422    uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
423    assert(plane < image->plane_count);
424 
425    if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
426                               &region->imageOffset, &region->imageExtent,
427                               &fb_format)) {
428       return false;
429    }
430 
431    uint32_t internal_type, internal_bpp;
432    v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
433       (fb_format, region->imageSubresource.aspectMask,
434        &internal_type, &internal_bpp);
435 
436    uint32_t num_layers;
437    if (image->vk.image_type != VK_IMAGE_TYPE_3D)
438       num_layers = region->imageSubresource.layerCount;
439    else
440       num_layers = region->imageExtent.depth;
441    assert(num_layers > 0);
442 
443    struct v3dv_job *job =
444       v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
445    if (!job)
446       return true;
447 
448    /* Handle copy from compressed format using a compatible format */
449    const uint32_t block_w =
450       vk_format_get_blockwidth(image->planes[plane].vk_format);
451    const uint32_t block_h =
452       vk_format_get_blockheight(image->planes[plane].vk_format);
453    const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
454    const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
455 
456    v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
457                         internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
458                         false);
459 
460    struct v3dv_meta_framebuffer framebuffer;
461    v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
462                                               internal_type, &job->frame_tiling);
463 
464    v3dv_X(job->device, job_emit_binning_flush)(job);
465    v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl)
466       (job, buffer, image, &framebuffer, region);
467 
468    v3dv_cmd_buffer_finish_job(cmd_buffer);
469 
470    return true;
471 }
472 
473 static bool
474 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
475             struct v3dv_image *dst,
476             VkFormat dst_format,
477             struct v3dv_image *src,
478             VkFormat src_format,
479             VkColorComponentFlags cmask,
480             VkComponentMapping *cswizzle,
481             const VkImageBlit2 *region,
482             VkFilter filter,
483             bool dst_is_padded_image);
484 
485 
486 /**
487  * A structure that contains all the information we may need in various
488  * processes involving image to buffer copies implemented with blit paths.
489  */
490 struct image_to_buffer_info {
491    /* Source image info */
492    VkFormat src_format;
493    uint8_t plane;
494    VkColorComponentFlags cmask;
495    VkComponentMapping cswizzle;
496    VkImageAspectFlags src_copy_aspect;
497    uint32_t block_width;
498    uint32_t block_height;
499 
500    /* Destination buffer info */
501    VkFormat dst_format;
502    uint32_t buf_width;
503    uint32_t buf_height;
504    uint32_t buf_bpp;
505    VkImageAspectFlags dst_copy_aspect;
506 };
507 
508 static VkImageBlit2
blit_region_for_image_to_buffer(const VkOffset3D * offset,const VkExtent3D * extent,uint32_t mip_level,uint32_t base_layer,uint32_t layer_offset,struct image_to_buffer_info * info)509 blit_region_for_image_to_buffer(const VkOffset3D *offset,
510                                 const VkExtent3D *extent,
511                                 uint32_t mip_level,
512                                 uint32_t base_layer,
513                                 uint32_t layer_offset,
514                                 struct image_to_buffer_info *info)
515 {
516    VkImageBlit2 output = {
517       .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
518       .srcSubresource = {
519          .aspectMask = info->src_copy_aspect,
520          .mipLevel = mip_level,
521          .baseArrayLayer = base_layer + layer_offset,
522          .layerCount = 1,
523       },
524       .srcOffsets = {
525          {
526             DIV_ROUND_UP(offset->x, info->block_width),
527             DIV_ROUND_UP(offset->y, info->block_height),
528             offset->z + layer_offset,
529          },
530          {
531             DIV_ROUND_UP(offset->x + extent->width, info->block_width),
532             DIV_ROUND_UP(offset->y + extent->height, info->block_height),
533             offset->z + layer_offset + 1,
534          },
535       },
536       .dstSubresource = {
537          .aspectMask = info->dst_copy_aspect,
538          .mipLevel = 0,
539          .baseArrayLayer = 0,
540          .layerCount = 1,
541       },
542       .dstOffsets = {
543          { 0, 0, 0 },
544          {
545             DIV_ROUND_UP(extent->width, info->block_width),
546             DIV_ROUND_UP(extent->height, info->block_height),
547             1
548          },
549       },
550    };
551 
552    return output;
553 }
554 
555 /**
556  * Produces an image_to_buffer_info struct from a VkBufferImageCopy2 that we can
557  * use to implement buffer to image copies with blit paths.
558  *
559  * Returns false if the copy operation can't be implemented with a blit.
560  */
561 static bool
gather_image_to_buffer_info(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region,struct image_to_buffer_info * out_info)562 gather_image_to_buffer_info(struct v3dv_cmd_buffer *cmd_buffer,
563                             struct v3dv_image *image,
564                             const VkBufferImageCopy2 *region,
565                             struct image_to_buffer_info *out_info)
566 {
567    bool supported = false;
568 
569    VkImageAspectFlags dst_copy_aspect = region->imageSubresource.aspectMask;
570    /* For multi-planar images we copy one plane at a time using an image alias
571     * with a color aspect for each plane.
572     */
573    if (image->plane_count > 1)
574       dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
575 
576    VkImageAspectFlags src_copy_aspect = region->imageSubresource.aspectMask;
577    uint8_t plane = v3dv_plane_from_aspect(src_copy_aspect);
578    assert(plane < image->plane_count);
579 
580    /* Generally, the bpp of the data in the buffer matches that of the
581     * source image. The exception is the case where we are copying
582     * stencil (8bpp) to a combined d24s8 image (32bpp).
583     */
584    uint32_t buffer_bpp = image->planes[plane].cpp;
585 
586    /* Because we are going to implement the copy as a blit, we need to create
587     * a linear image from the destination buffer and we also want our blit
588     * source and destination formats to be the same (to avoid any format
589     * conversions), so we choose a canonical format that matches the
590     * source image bpp.
591     *
592     * The exception to the above is copying from combined depth/stencil images
593     * because we are copying only one aspect of the image, so we need to setup
594     * our formats, color write mask and source swizzle mask to match that.
595     */
596    VkFormat dst_format;
597    VkFormat src_format;
598    VkColorComponentFlags cmask = 0; /* All components */
599    VkComponentMapping cswizzle = {
600       .r = VK_COMPONENT_SWIZZLE_IDENTITY,
601       .g = VK_COMPONENT_SWIZZLE_IDENTITY,
602       .b = VK_COMPONENT_SWIZZLE_IDENTITY,
603       .a = VK_COMPONENT_SWIZZLE_IDENTITY,
604    };
605    switch (buffer_bpp) {
606    case 16:
607       assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
608       dst_format = VK_FORMAT_R32G32B32A32_UINT;
609       src_format = dst_format;
610       break;
611    case 8:
612       assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
613       dst_format = VK_FORMAT_R16G16B16A16_UINT;
614       src_format = dst_format;
615       break;
616    case 4:
617       switch (dst_copy_aspect) {
618       case VK_IMAGE_ASPECT_COLOR_BIT:
619          src_format = VK_FORMAT_R8G8B8A8_UINT;
620          dst_format = VK_FORMAT_R8G8B8A8_UINT;
621          break;
622       case VK_IMAGE_ASPECT_DEPTH_BIT:
623          assert(image->plane_count == 1);
624          assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
625                 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
626                 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
627          if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
628             src_format = VK_FORMAT_R32_UINT;
629             dst_format = VK_FORMAT_R32_UINT;
630          } else {
631             /* We want to write depth in the buffer in the first 24-bits,
632              * however, the hardware has depth in bits 8-31, so swizzle the
633              * the source components to match what we want. Also, we don't
634              * want to write bits 24-31 in the destination.
635              */
636             src_format = VK_FORMAT_R8G8B8A8_UINT;
637             dst_format = VK_FORMAT_R8G8B8A8_UINT;
638             cmask = VK_COLOR_COMPONENT_R_BIT |
639                     VK_COLOR_COMPONENT_G_BIT |
640                     VK_COLOR_COMPONENT_B_BIT;
641             cswizzle.r = VK_COMPONENT_SWIZZLE_G;
642             cswizzle.g = VK_COMPONENT_SWIZZLE_B;
643             cswizzle.b = VK_COMPONENT_SWIZZLE_A;
644             cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
645          }
646          break;
647       case VK_IMAGE_ASPECT_STENCIL_BIT:
648          assert(image->plane_count == 1);
649          assert(dst_copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
650          assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
651          /* Copying from S8D24. We want to write 8-bit stencil values only,
652           * so adjust the buffer bpp for that. Since the hardware stores stencil
653           * in the LSB, we can just do a RGBA8UI to R8UI blit.
654           */
655          src_format = VK_FORMAT_R8G8B8A8_UINT;
656          dst_format = VK_FORMAT_R8_UINT;
657          buffer_bpp = 1;
658          break;
659       default:
660          unreachable("unsupported aspect");
661          return supported;
662       };
663       break;
664    case 2:
665       assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
666              dst_copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
667       dst_format = VK_FORMAT_R16_UINT;
668       src_format = dst_format;
669       break;
670    case 1:
671       assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
672       dst_format = VK_FORMAT_R8_UINT;
673       src_format = dst_format;
674       break;
675    default:
676       unreachable("unsupported bit-size");
677       return supported;
678    };
679 
680    /* The hardware doesn't support linear depth/stencil stores, so we
681     * implement copies of depth/stencil aspect as color copies using a
682     * compatible color format.
683     */
684    assert(vk_format_is_color(src_format));
685    assert(vk_format_is_color(dst_format));
686    dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
687 
688    /* We should be able to handle the blit if we got this far */
689    supported = true;
690 
691    /* Obtain the 2D buffer region spec */
692    uint32_t buf_width, buf_height;
693    if (region->bufferRowLength == 0)
694       buf_width = region->imageExtent.width;
695    else
696       buf_width = region->bufferRowLength;
697 
698    if (region->bufferImageHeight == 0)
699       buf_height = region->imageExtent.height;
700    else
701       buf_height = region->bufferImageHeight;
702 
703    /* If the image is compressed, the bpp refers to blocks, not pixels */
704    uint32_t block_width =
705       vk_format_get_blockwidth(image->planes[plane].vk_format);
706    uint32_t block_height =
707       vk_format_get_blockheight(image->planes[plane].vk_format);
708    buf_width = DIV_ROUND_UP(buf_width, block_width);
709    buf_height = DIV_ROUND_UP(buf_height, block_height);
710 
711    out_info->src_format = src_format;
712    out_info->dst_format = dst_format;
713    out_info->src_copy_aspect = src_copy_aspect;
714    out_info->dst_copy_aspect = dst_copy_aspect;
715    out_info->buf_width = buf_width;
716    out_info->buf_height = buf_height;
717    out_info->buf_bpp = buffer_bpp;
718    out_info->block_width = block_width;
719    out_info->block_height = block_height;
720    out_info->cmask = cmask;
721    out_info->cswizzle = cswizzle;
722    out_info->plane = plane;
723 
724    return supported;
725 }
726 
727 /* Creates a linear image to alias buffer memory. It also includes that image
728  * as a private object in the cmd_buffer.
729  *
730  * This is used for cases where we want to implement an image to buffer copy,
731  * but we need to rely on a mechanism that uses an image as destination, like
732  * blitting.
733  */
734 static VkResult
create_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region,struct image_to_buffer_info * info,uint32_t layer,VkImage * out_image)735 create_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
736                          struct v3dv_buffer *buffer,
737                          const VkBufferImageCopy2 *region,
738                          struct image_to_buffer_info *info,
739                          uint32_t layer,
740                          VkImage *out_image)
741 {
742    VkImageCreateInfo image_info = {
743       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
744       .imageType = VK_IMAGE_TYPE_2D,
745       .format = info->dst_format,
746       .extent = { info->buf_width, info->buf_height, 1 },
747       .mipLevels = 1,
748       .arrayLayers = 1,
749       .samples = VK_SAMPLE_COUNT_1_BIT,
750       .tiling = VK_IMAGE_TILING_LINEAR,
751       .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
752       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
753       .queueFamilyIndexCount = 0,
754       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
755    };
756 
757    VkResult result;
758    struct v3dv_device *device = cmd_buffer->device;
759    VkDevice _device = v3dv_device_to_handle(device);
760 
761    VkImage buffer_image;
762    result =
763       v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
764    if (result != VK_SUCCESS)
765       return result;
766 
767    *out_image = buffer_image;
768 
769    v3dv_cmd_buffer_add_private_obj(
770       cmd_buffer, (uintptr_t)buffer_image,
771       (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
772 
773    /* Bind the buffer memory to the image
774     */
775    VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
776       layer * info->buf_width * info->buf_height * info->buf_bpp;
777 
778    result =
779       vk_common_BindImageMemory(_device, buffer_image,
780                                 v3dv_device_memory_to_handle(buffer->mem),
781                                 buffer_offset);
782    return result;
783 }
784 
785 /**
786  * Creates an image with a single mip level that aliases the memory of a
787  * mip level in another image, re-interpreting the memory with an uncompressed
788  * format. The image is added to the command buffer as a private object for
789  * disposal.
790  */
791 static bool
create_image_mip_level_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,VkFormat format,uint32_t plane,uint32_t mip_level,uint32_t layer,VkImage * alias)792 create_image_mip_level_alias(struct v3dv_cmd_buffer *cmd_buffer,
793                              struct v3dv_image *image,
794                              VkFormat format,
795                              uint32_t plane,
796                              uint32_t mip_level,
797                              uint32_t layer,
798                              VkImage *alias)
799 {
800    VkResult result;
801    assert(!vk_format_is_compressed(format));
802 
803    struct v3dv_device *device = cmd_buffer->device;
804    VkDevice vk_device = v3dv_device_to_handle(device);
805    uint32_t mip_width = image->planes[plane].slices[mip_level].width;
806    uint32_t mip_height = image->planes[plane].slices[mip_level].height;
807 
808    uint32_t block_width =
809       vk_format_get_blockwidth(image->planes[plane].vk_format);
810    uint32_t block_height =
811       vk_format_get_blockheight(image->planes[plane].vk_format);
812 
813    VkImageCreateInfo info = {
814       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
815       .imageType = image->vk.image_type,
816       .format = format,
817       .extent = { DIV_ROUND_UP(mip_width, block_width),
818                   DIV_ROUND_UP(mip_height, block_height),
819                   1 },
820       .mipLevels = 1,
821       .arrayLayers = 1,
822       .samples = image->vk.samples,
823       .tiling = image->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
824       .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
825       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
826       .queueFamilyIndexCount = 0,
827       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
828    };
829    result = v3dv_CreateImage(vk_device, &info, &device->vk.alloc, alias);
830    if (result != VK_SUCCESS)
831       return false;
832 
833    /* The alias we have just created has just one mip, but we may be aliasing
834     * any mip in the original image. Because the slice setup changes based on
835     * the mip (particularly, for mips >= 2 it uses power of 2 sizes internally)
836     * and this can influence the tiling layout selected for the slice, we want
837     * to make sure we copy the slice description from the actual mip level in
838     * the original image, and then rewrite any fields that we need for the
839     * alias. Particularly, we want to make the offset 0 because we are going to
840     * bind the underlying image memory exactly at the start of the selected mip.
841     * We also want to relax the image alignment requirements to the minimum
842     * (the one imposed by the Texture Base Address field) since we may not be
843     * aliasing a level 0 (for which we typically want a page alignment for
844     * optimal performance).
845     */
846    V3DV_FROM_HANDLE(v3dv_image, v3dv_alias, *alias);
847    v3dv_alias->planes[plane].slices[0] = image->planes[plane].slices[mip_level];
848    v3dv_alias->planes[plane].slices[0].width = info.extent.width;
849    v3dv_alias->planes[plane].slices[0].height = info.extent.height;
850    v3dv_alias->planes[plane].slices[0].offset = 0;
851    v3dv_alias->planes[plane].alignment = 64;
852 
853    v3dv_cmd_buffer_add_private_obj(
854       cmd_buffer, (uintptr_t)*alias,
855       (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
856 
857    result =
858       vk_common_BindImageMemory(vk_device, *alias,
859                                 v3dv_device_memory_to_handle(image->planes[plane].mem),
860                                 v3dv_layer_offset(image, mip_level, layer, plane));
861    return result == VK_SUCCESS;
862 }
863 
864 /**
865  * Returns true if the implementation supports the requested operation (even if
866  * it failed to process it, for example, due to an out-of-memory error).
867  */
868 static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)869 copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
870                           struct v3dv_buffer *buffer,
871                           struct v3dv_image *image,
872                           const VkBufferImageCopy2 *region)
873 {
874    bool handled = false;
875    struct image_to_buffer_info info;
876 
877    /* This path uses a shader blit which doesn't support linear images. Return
878     * early to avoid all the heavy lifting in preparation for the
879     * blit_shader() call that is bound to fail in that scenario.
880     */
881    if (!image->tiled && image->vk.image_type != VK_IMAGE_TYPE_1D) {
882       return handled;
883    }
884 
885    handled = gather_image_to_buffer_info(cmd_buffer, image, region,
886                                          &info);
887 
888    if (!handled)
889       return handled;
890 
891    /* We should be able to handle the blit if we got this far */
892    handled = true;
893 
894    /* Compute layers to copy */
895    uint32_t num_layers;
896    if (image->vk.image_type != VK_IMAGE_TYPE_3D)
897       num_layers = region->imageSubresource.layerCount;
898    else
899       num_layers = region->imageExtent.depth;
900    assert(num_layers > 0);
901 
902    /* Copy requested layers */
903    VkResult result;
904    VkImageBlit2 blit_region;
905    uint32_t mip_level = region->imageSubresource.mipLevel;
906    uint32_t base_layer = region->imageSubresource.baseArrayLayer;
907    for (uint32_t i = 0; i < num_layers; i++) {
908       uint32_t layer_offset = i;
909 
910       if (vk_format_is_compressed(image->vk.format)) {
911          /* Our blit interface can see the real format of the images to detect
912           * copies between compressed and uncompressed images and adapt the
913           * blit region accordingly. Here we are just doing a raw copy of
914           * compressed data, but we are passing an uncompressed view of the
915           * buffer for the blit destination image (since compressed formats are
916           * not renderable), so we also want to provide an uncompressed view of
917           * the source image.
918           *
919           * It is important that we create the alias over the selected mip
920           * level (instead of aliasing the entire image) because an uncompressed
921           * view of the image won't have the same number of mip levels as the
922           * original image and the implicit mip size calculations the hw will
923           * do to sample from a non-zero mip level may not match exactly between
924           * compressed and uncompressed views.
925           */
926          VkImage alias;
927          if (!create_image_mip_level_alias(cmd_buffer, image, info.dst_format,
928                                            info.plane, mip_level,
929                                            base_layer + layer_offset,
930                                            &alias)) {
931             return handled;
932          }
933 
934          /* We are aliasing the selected mip level and layer with a
935           * single-mip and single-layer image.
936           */
937          image = v3dv_image_from_handle(alias);
938          mip_level = 0;
939          base_layer = 0;
940          layer_offset = 0;
941       }
942 
943       /* Create the destination blit image from the destination buffer */
944       VkImage buffer_image;
945       result =
946          create_image_from_buffer(cmd_buffer, buffer, region, &info,
947                                   i, &buffer_image);
948       if (result != VK_SUCCESS)
949          return handled;
950 
951       /* Blit-copy the requested image extent.
952        *
953        * Since we are copying, the blit must use the same format on the
954        * destination and source images to avoid format conversions. The
955        * only exception is copying stencil, which we upload to a R8UI source
956        * image, but that we need to blit to a S8D24 destination (the only
957        * stencil format we support).
958        */
959       blit_region =
960          blit_region_for_image_to_buffer(&region->imageOffset,
961                                          &region->imageExtent,
962                                          mip_level, base_layer, layer_offset,
963                                          &info);
964 
965       handled = blit_shader(cmd_buffer,
966                             v3dv_image_from_handle(buffer_image),
967                             info.dst_format,
968                             image, info.src_format,
969                             info.cmask, &info.cswizzle,
970                             &blit_region, VK_FILTER_NEAREST, false);
971       if (!handled) {
972          /* This is unexpected, we should have a supported blit spec */
973          unreachable("Unable to blit buffer to destination image");
974          return false;
975       }
976    }
977 
978    assert(handled);
979    return true;
980 }
981 
982 static bool
983 copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
984                                struct v3dv_image *dst,
985                                struct v3dv_image *src,
986                                const VkImageCopy2 *region);
987 
988 static VkImageCopy2
image_copy_region_for_image_to_buffer(const VkBufferImageCopy2 * region,struct image_to_buffer_info * info,uint32_t layer)989 image_copy_region_for_image_to_buffer(const VkBufferImageCopy2 *region,
990                                       struct image_to_buffer_info *info,
991                                       uint32_t layer)
992 {
993    VkImageCopy2 output = {
994       .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
995       .srcSubresource = {
996          .aspectMask = info->src_copy_aspect,
997          .mipLevel = region->imageSubresource.mipLevel,
998          .baseArrayLayer = region->imageSubresource.baseArrayLayer + layer,
999          .layerCount = 1,
1000       },
1001       .srcOffset = {
1002             DIV_ROUND_UP(region->imageOffset.x, info->block_width),
1003             DIV_ROUND_UP(region->imageOffset.y, info->block_height),
1004             region->imageOffset.z,
1005       },
1006       .dstSubresource = {
1007          .aspectMask = info->dst_copy_aspect,
1008          .mipLevel = 0,
1009          .baseArrayLayer = 0,
1010          .layerCount = 1,
1011       },
1012       .dstOffset = { 0, 0, 0 },
1013       .extent = {
1014          DIV_ROUND_UP(region->imageExtent.width, info->block_width),
1015          DIV_ROUND_UP(region->imageExtent.height, info->block_height),
1016          1
1017       },
1018    };
1019 
1020    return output;
1021 }
1022 
1023 /**
1024  * Returns true if the implementation supports the requested operation (even if
1025  * it failed to process it, for example, due to an out-of-memory error).
1026  */
1027 static bool
copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * dst_buffer,struct v3dv_image * src_image,const VkBufferImageCopy2 * region)1028 copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1029                                   struct v3dv_buffer *dst_buffer,
1030                                   struct v3dv_image *src_image,
1031                                   const VkBufferImageCopy2 *region)
1032 {
1033    bool handled = false;
1034    VkImage dst_buffer_image;
1035    struct image_to_buffer_info info;
1036 
1037    /* This is a requirement for copy_image_linear_texel_buffer below. We check
1038     * it in advance in order to do an early return
1039     */
1040    if (src_image->tiled)
1041       return false;
1042 
1043    handled =
1044       gather_image_to_buffer_info(cmd_buffer, src_image, region,
1045                                   &info);
1046    if (!handled)
1047       return handled;
1048 
1049    /* At this point the implementation should support the copy, any possible
1050     * error below are for different reasons, like out-of-memory error
1051     */
1052    handled = true;
1053 
1054    uint32_t num_layers;
1055    if (src_image->vk.image_type != VK_IMAGE_TYPE_3D)
1056       num_layers = region->imageSubresource.layerCount;
1057    else
1058       num_layers = region->imageExtent.depth;
1059    assert(num_layers > 0);
1060 
1061    VkResult result;
1062    VkImageCopy2 image_region;
1063    for (uint32_t layer = 0; layer < num_layers; layer++) {
1064       /* Create the destination image from the destination buffer */
1065       result =
1066          create_image_from_buffer(cmd_buffer, dst_buffer, region, &info,
1067                                   layer, &dst_buffer_image);
1068       if (result != VK_SUCCESS)
1069          return handled;
1070 
1071       image_region =
1072          image_copy_region_for_image_to_buffer(region, &info, layer);
1073 
1074       handled =
1075          copy_image_linear_texel_buffer(cmd_buffer,
1076                                         v3dv_image_from_handle(dst_buffer_image),
1077                                         src_image, &image_region);
1078    }
1079 
1080    return handled;
1081 }
1082 
1083 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)1084 v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
1085                               const VkCopyImageToBufferInfo2 *info)
1086 
1087 {
1088    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1089    V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
1090    V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
1091 
1092    assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1093 
1094    cmd_buffer->state.is_transfer = true;
1095 
1096    for (uint32_t i = 0; i < info->regionCount; i++) {
1097       const VkBufferImageCopy2 *region = &info->pRegions[i];
1098 
1099       if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, region))
1100          continue;
1101 
1102       if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, region))
1103          continue;
1104 
1105       if (copy_image_to_buffer_texel_buffer(cmd_buffer, buffer, image, region))
1106          continue;
1107 
1108       unreachable("Unsupported image to buffer copy.");
1109    }
1110    cmd_buffer->state.is_transfer = false;
1111 }
1112 
1113 /**
1114  * Returns true if the implementation supports the requested operation (even if
1115  * it failed to process it, for example, due to an out-of-memory error).
1116  */
1117 static bool
copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1118 copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1119                struct v3dv_image *dst,
1120                struct v3dv_image *src,
1121                const VkImageCopy2 *region)
1122 {
1123    if (V3D_DBG(DISABLE_TFU)) {
1124       perf_debug("Copy images: TFU disabled, fallbacks could be slower.\n");
1125       return false;
1126    }
1127 
1128    /* Destination can't be raster format */
1129    if (!dst->tiled)
1130       return false;
1131 
1132    /* We can only do full copies, so if the format is D24S8 both aspects need
1133     * to be copied. We only need to check the dst format because the spec
1134     * states that depth/stencil formats must match exactly.
1135     */
1136    if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
1137        const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
1138                                              VK_IMAGE_ASPECT_STENCIL_BIT;
1139        if (region->dstSubresource.aspectMask != ds_aspects)
1140           return false;
1141    }
1142 
1143    /* Don't handle copies between uncompressed and compressed formats for now.
1144     *
1145     * FIXME: we should be able to handle these easily but there is no coverage
1146     * in CTS at the moment that make such copies with full images (which we
1147     * require here), only partial copies. Also, in that case the code below that
1148     * checks for "dst image complete" requires some changes, since it is
1149     * checking against the region dimensions, which are in units of the source
1150     * image format.
1151     */
1152    if (vk_format_is_compressed(dst->vk.format) !=
1153        vk_format_is_compressed(src->vk.format)) {
1154       return false;
1155    }
1156 
1157    /* Source region must start at (0,0) */
1158    if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
1159       return false;
1160 
1161    /* Destination image must be complete */
1162    if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
1163       return false;
1164 
1165    uint8_t src_plane =
1166       v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1167    uint8_t dst_plane =
1168       v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1169 
1170    const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
1171    uint32_t dst_width = u_minify(dst->planes[dst_plane].width, dst_mip_level);
1172    uint32_t dst_height = u_minify(dst->planes[dst_plane].height, dst_mip_level);
1173    if (region->extent.width != dst_width || region->extent.height != dst_height)
1174       return false;
1175 
1176    /* From vkCmdCopyImage:
1177     *
1178     *   "When copying between compressed and uncompressed formats the extent
1179     *    members represent the texel dimensions of the source image and not
1180     *    the destination."
1181     */
1182    const uint32_t block_w =
1183       vk_format_get_blockwidth(src->planes[src_plane].vk_format);
1184    const uint32_t block_h =
1185       vk_format_get_blockheight(src->planes[src_plane].vk_format);
1186    uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
1187    uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
1188 
1189    /* Account for sample count */
1190    assert(dst->vk.samples == src->vk.samples);
1191    if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
1192       assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
1193       width *= 2;
1194       height *= 2;
1195    }
1196 
1197    /* The TFU unit doesn't handle format conversions so we need the formats to
1198     * match. On the other hand, vkCmdCopyImage allows different color formats
1199     * on the source and destination images, but only if they are texel
1200     * compatible. For us, this means that we can effectively ignore different
1201     * formats and just make the copy using either of them, since we are just
1202     * moving raw data and not making any conversions.
1203     *
1204     * Also, the formats supported by the TFU unit are limited, but again, since
1205     * we are only doing raw copies here without interpreting or converting
1206     * the underlying pixel data according to its format, we can always choose
1207     * to use compatible formats that are supported with the TFU unit.
1208     */
1209    assert(dst->planes[dst_plane].cpp == src->planes[src_plane].cpp);
1210    const struct v3dv_format *format =
1211       v3dv_get_compatible_tfu_format(cmd_buffer->device,
1212                                      dst->planes[dst_plane].cpp, NULL);
1213 
1214    /* Emit a TFU job for each layer to blit */
1215    const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1216       region->dstSubresource.layerCount :
1217       region->extent.depth;
1218    const uint32_t src_mip_level = region->srcSubresource.mipLevel;
1219 
1220    const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
1221       region->srcSubresource.baseArrayLayer : region->srcOffset.z;
1222    const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1223       region->dstSubresource.baseArrayLayer : region->dstOffset.z;
1224    for (uint32_t i = 0; i < layer_count; i++) {
1225       const uint32_t dst_offset =
1226          dst->planes[dst_plane].mem->bo->offset +
1227          v3dv_layer_offset(dst, dst_mip_level, base_dst_layer + i, dst_plane);
1228       const uint32_t src_offset =
1229          src->planes[src_plane].mem->bo->offset +
1230          v3dv_layer_offset(src, src_mip_level, base_src_layer + i, src_plane);
1231 
1232       const struct v3d_resource_slice *dst_slice =
1233          &dst->planes[dst_plane].slices[dst_mip_level];
1234       const struct v3d_resource_slice *src_slice =
1235          &src->planes[src_plane].slices[src_mip_level];
1236 
1237       v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
1238          cmd_buffer,
1239          dst->planes[dst_plane].mem->bo->handle,
1240          dst_offset,
1241          dst_slice->tiling,
1242          dst_slice->padded_height,
1243          dst->planes[dst_plane].cpp,
1244          src->planes[src_plane].mem->bo->handle,
1245          src_offset,
1246          src_slice->tiling,
1247          src_slice->tiling == V3D_TILING_RASTER ?
1248                               src_slice->stride : src_slice->padded_height,
1249          src->planes[src_plane].cpp,
1250          /* All compatible TFU formats are single-plane */
1251          width, height, &format->planes[0]);
1252    }
1253 
1254    return true;
1255 }
1256 
1257 inline bool
v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1258 v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1259                                struct v3dv_image *dst,
1260                                struct v3dv_image *src,
1261                                const VkImageCopy2 *region)
1262 {
1263    return copy_image_tfu(cmd_buffer, dst, src, region);
1264 }
1265 
1266 /**
1267  * Returns true if the implementation supports the requested operation (even if
1268  * it failed to process it, for example, due to an out-of-memory error).
1269  */
1270 static bool
copy_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1271 copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1272                struct v3dv_image *dst,
1273                struct v3dv_image *src,
1274                const VkImageCopy2 *region)
1275 {
1276    uint8_t src_plane =
1277       v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1278    assert(src_plane < src->plane_count);
1279    uint8_t dst_plane =
1280       v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1281    assert(dst_plane < dst->plane_count);
1282 
1283    VkFormat fb_format;
1284    if (!v3dv_meta_can_use_tlb(src, src_plane, region->srcSubresource.mipLevel,
1285                               &region->srcOffset, NULL, &fb_format) ||
1286        !v3dv_meta_can_use_tlb(dst, dst_plane, region->dstSubresource.mipLevel,
1287                               &region->dstOffset, &region->extent, &fb_format)) {
1288       return false;
1289    }
1290 
1291    /* From the Vulkan spec, VkImageCopy valid usage:
1292     *
1293     *    "If neither the calling command’s srcImage nor the calling command’s
1294     *     dstImage has a multi-planar image format then the aspectMask member
1295     *     of srcSubresource and dstSubresource must match."
1296     */
1297    assert(src->plane_count != 1 || dst->plane_count != 1 ||
1298           region->dstSubresource.aspectMask ==
1299           region->srcSubresource.aspectMask);
1300    uint32_t internal_type, internal_bpp;
1301    v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1302       (fb_format, region->dstSubresource.aspectMask,
1303        &internal_type, &internal_bpp);
1304 
1305    /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
1306     *
1307     * "The number of slices of the extent (for 3D) or layers of the
1308     *  srcSubresource (for non-3D) must match the number of slices of the
1309     *  extent (for 3D) or layers of the dstSubresource (for non-3D)."
1310     */
1311    assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
1312            region->srcSubresource.layerCount : region->extent.depth) ==
1313           (dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1314            region->dstSubresource.layerCount : region->extent.depth));
1315    uint32_t num_layers;
1316    if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
1317       num_layers = region->dstSubresource.layerCount;
1318    else
1319       num_layers = region->extent.depth;
1320    assert(num_layers > 0);
1321 
1322    struct v3dv_job *job =
1323       v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1324    if (!job)
1325       return true;
1326 
1327    /* Handle copy to compressed image using compatible format */
1328    const uint32_t block_w =
1329       vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
1330    const uint32_t block_h =
1331       vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
1332    const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
1333    const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
1334 
1335    v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
1336                         internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
1337                         src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
1338 
1339    struct v3dv_meta_framebuffer framebuffer;
1340    v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
1341                                               internal_type, &job->frame_tiling);
1342 
1343    v3dv_X(job->device, job_emit_binning_flush)(job);
1344    v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
1345 
1346    v3dv_cmd_buffer_finish_job(cmd_buffer);
1347 
1348    return true;
1349 }
1350 
1351 /**
1352  * Takes the image provided as argument and creates a new image that has
1353  * the same specification and aliases the same memory storage, except that:
1354  *
1355  *   - It has the uncompressed format passed in.
1356  *   - Its original width/height are scaled by the factors passed in.
1357  *
1358  * This is useful to implement copies from compressed images using the blit
1359  * path. The idea is that we create uncompressed "image views" of both the
1360  * source and destination images using the uncompressed format and then we
1361  * define the copy blit in terms of that format.
1362  */
1363 static struct v3dv_image *
create_image_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * src,float width_scale,float height_scale,VkFormat format)1364 create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
1365                    struct v3dv_image *src,
1366                    float width_scale,
1367                    float height_scale,
1368                    VkFormat format)
1369 {
1370    assert(!vk_format_is_compressed(format));
1371    /* We don't support ycbcr compressed formats */
1372    assert(src->plane_count == 1);
1373 
1374    VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
1375 
1376    VkImageCreateInfo info = {
1377       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1378       .imageType = src->vk.image_type,
1379       .format = format,
1380       .extent = {
1381          .width = src->vk.extent.width * width_scale,
1382          .height = src->vk.extent.height * height_scale,
1383          .depth = src->vk.extent.depth,
1384       },
1385       .mipLevels = src->vk.mip_levels,
1386       .arrayLayers = src->vk.array_layers,
1387       .samples = src->vk.samples,
1388       .tiling = src->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
1389       .usage = src->vk.usage,
1390    };
1391 
1392     VkImage _image;
1393     VkResult result =
1394       v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
1395     if (result != VK_SUCCESS) {
1396        v3dv_flag_oom(cmd_buffer, NULL);
1397        return NULL;
1398     }
1399 
1400     struct v3dv_image *image = v3dv_image_from_handle(_image);
1401     image->planes[0].mem = src->planes[0].mem;
1402     image->planes[0].mem_offset = src->planes[0].mem_offset;
1403     return image;
1404 }
1405 
1406 /**
1407  * Returns true if the implementation supports the requested operation (even if
1408  * it failed to process it, for example, due to an out-of-memory error).
1409  */
1410 static bool
copy_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1411 copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
1412                 struct v3dv_image *dst,
1413                 struct v3dv_image *src,
1414                 const VkImageCopy2 *region)
1415 {
1416    if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D)
1417       return false;
1418 
1419    uint8_t src_plane =
1420       v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1421    assert(src_plane < src->plane_count);
1422    uint8_t dst_plane =
1423       v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1424    assert(dst_plane < dst->plane_count);
1425 
1426    const uint32_t src_block_w =
1427       vk_format_get_blockwidth(src->planes[src_plane].vk_format);
1428    const uint32_t src_block_h =
1429       vk_format_get_blockheight(src->planes[src_plane].vk_format);
1430    const uint32_t dst_block_w =
1431       vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
1432    const uint32_t dst_block_h =
1433       vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
1434    const float block_scale_w = (float)src_block_w / (float)dst_block_w;
1435    const float block_scale_h = (float)src_block_h / (float)dst_block_h;
1436 
1437    /* We need to choose a single format for the blit to ensure that this is
1438     * really a copy and there are not format conversions going on. Since we
1439     * going to blit, we need to make sure that the selected format can be
1440     * both rendered to and textured from.
1441     */
1442    VkFormat format;
1443    float src_scale_w = 1.0f;
1444    float src_scale_h = 1.0f;
1445    float dst_scale_w = block_scale_w;
1446    float dst_scale_h = block_scale_h;
1447    if (vk_format_is_compressed(src->vk.format)) {
1448       /* If we are copying from a compressed format we should be aware that we
1449        * are going to texture from the source image, and the texture setup
1450        * knows the actual size of the image, so we need to choose a format
1451        * that has a per-texel (not per-block) bpp that is compatible for that
1452        * image size. For example, for a source image with size Bw*WxBh*H
1453        * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
1454        * each of the Bw*WxBh*H texels in the compressed source image is 8-bit
1455        * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
1456        * so we could specify a blit with size Bw*WxBh*H and a format with
1457        * a bpp of 8-bit per texel (R8_UINT).
1458        *
1459        * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
1460        * which is 64-bit per texel, then we would need a 4-bit format, which
1461        * we don't have, so instead we still choose an 8-bit format, but we
1462        * apply a divisor to the row dimensions of the blit, since we are
1463        * copying two texels per item.
1464        *
1465        * Generally, we can choose any format so long as we compute appropriate
1466        * divisors for the width and height depending on the source image's
1467        * bpp.
1468        */
1469       assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp);
1470 
1471       format = VK_FORMAT_R32G32_UINT;
1472       switch (src->planes[src_plane].cpp) {
1473       case 16:
1474          format = VK_FORMAT_R32G32B32A32_UINT;
1475          break;
1476       case 8:
1477          format = VK_FORMAT_R16G16B16A16_UINT;
1478          break;
1479       default:
1480          unreachable("Unsupported compressed format");
1481       }
1482 
1483       /* Create image views of the src/dst images that we can interpret in
1484        * terms of the canonical format.
1485        */
1486       src_scale_w /= src_block_w;
1487       src_scale_h /= src_block_h;
1488       dst_scale_w /= src_block_w;
1489       dst_scale_h /= src_block_h;
1490 
1491       src = create_image_alias(cmd_buffer, src,
1492                                src_scale_w, src_scale_h, format);
1493 
1494       dst = create_image_alias(cmd_buffer, dst,
1495                                dst_scale_w, dst_scale_h, format);
1496    } else {
1497       format = src->format->planes[src_plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
1498          src->planes[src_plane].vk_format :
1499          get_compatible_tlb_format(src->planes[src_plane].vk_format);
1500       if (format == VK_FORMAT_UNDEFINED)
1501          return false;
1502 
1503       const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
1504       assert(f->plane_count < 2);
1505       if (!f->plane_count || f->planes[0].tex_type == TEXTURE_DATA_FORMAT_NO)
1506          return false;
1507    }
1508 
1509    /* Given an uncompressed image with size WxH, if we copy it to a compressed
1510     * image, it will result in an image with size W*bWxH*bH, where bW and bH
1511     * are the compressed format's block width and height. This means that
1512     * copies between compressed and uncompressed images involve different
1513     * image sizes, and therefore, we need to take that into account when
1514     * setting up the source and destination blit regions below, so they are
1515     * consistent from the point of view of the single compatible format
1516     * selected for the copy.
1517     *
1518     * We should take into account that the dimensions of the region provided
1519     * to the copy command are specified in terms of the source image. With that
1520     * in mind, below we adjust the blit destination region to be consistent with
1521     * the source region for the compatible format, so basically, we apply
1522     * the block scale factor to the destination offset provided by the copy
1523     * command (because it is specified in terms of the destination image, not
1524     * the source), and then we just add the region copy dimensions to that
1525     * (since the region dimensions are already specified in terms of the source
1526     * image).
1527     */
1528    uint32_t region_width = region->extent.width * src_scale_w;
1529    uint32_t region_height = region->extent.height * src_scale_h;
1530    if (src_block_w > 1)
1531       region_width = util_next_power_of_two(region_width);
1532    if (src_block_h > 1)
1533       region_height = util_next_power_of_two(region_height);
1534 
1535    const VkOffset3D src_start = {
1536       region->srcOffset.x * src_scale_w,
1537       region->srcOffset.y * src_scale_h,
1538       region->srcOffset.z,
1539    };
1540    const VkOffset3D src_end = {
1541       src_start.x + region_width,
1542       src_start.y + region_height,
1543       src_start.z + region->extent.depth,
1544    };
1545 
1546    const VkOffset3D dst_start = {
1547       region->dstOffset.x * dst_scale_w,
1548       region->dstOffset.y * dst_scale_h,
1549       region->dstOffset.z,
1550    };
1551    const VkOffset3D dst_end = {
1552       dst_start.x + region_width,
1553       dst_start.y + region_height,
1554       dst_start.z + region->extent.depth,
1555    };
1556 
1557    const VkImageBlit2 blit_region = {
1558       .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
1559       .srcSubresource = region->srcSubresource,
1560       .srcOffsets = { src_start, src_end },
1561       .dstSubresource = region->dstSubresource,
1562       .dstOffsets = { dst_start, dst_end },
1563    };
1564    bool handled = blit_shader(cmd_buffer,
1565                               dst, format,
1566                               src, format,
1567                               0, NULL,
1568                               &blit_region, VK_FILTER_NEAREST, true);
1569 
1570    /* We should have selected formats that we can blit */
1571    assert(handled);
1572    return handled;
1573 }
1574 
1575 static bool
copy_image_linear_texel_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1576 copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1577                                struct v3dv_image *dst,
1578                                struct v3dv_image *src,
1579                                const VkImageCopy2 *region)
1580 {
1581    if (src->tiled)
1582       return false;
1583 
1584    /* Implementations are allowed to restrict linear images like this */
1585    assert(region->srcOffset.z == 0);
1586    assert(region->dstOffset.z == 0);
1587    assert(region->srcSubresource.mipLevel == 0);
1588    assert(region->srcSubresource.baseArrayLayer == 0);
1589    assert(region->srcSubresource.layerCount == 1);
1590    assert(region->dstSubresource.mipLevel == 0);
1591    assert(region->dstSubresource.baseArrayLayer == 0);
1592    assert(region->dstSubresource.layerCount == 1);
1593 
1594    uint8_t src_plane =
1595       v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1596    uint8_t dst_plane =
1597       v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1598 
1599    assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp);
1600    const uint32_t bpp = src->planes[src_plane].cpp;
1601 
1602    VkFormat format;
1603    switch (bpp) {
1604    case 16:
1605       format = VK_FORMAT_R32G32B32A32_UINT;
1606       break;
1607    case 8:
1608       format = VK_FORMAT_R16G16B16A16_UINT;
1609       break;
1610    case 4:
1611       format = VK_FORMAT_R8G8B8A8_UINT;
1612       break;
1613    case 2:
1614       format = VK_FORMAT_R16_UINT;
1615       break;
1616    case 1:
1617       format = VK_FORMAT_R8_UINT;
1618       break;
1619    default:
1620       unreachable("unsupported bit-size");
1621       return false;
1622    }
1623 
1624    VkComponentMapping ident_swizzle = {
1625       .r = VK_COMPONENT_SWIZZLE_IDENTITY,
1626       .g = VK_COMPONENT_SWIZZLE_IDENTITY,
1627       .b = VK_COMPONENT_SWIZZLE_IDENTITY,
1628       .a = VK_COMPONENT_SWIZZLE_IDENTITY,
1629    };
1630 
1631    const uint32_t buf_stride = src->planes[src_plane].slices[0].stride;
1632    const VkDeviceSize buf_offset =
1633       region->srcOffset.y * buf_stride + region->srcOffset.x * bpp;
1634 
1635    struct v3dv_buffer src_buffer;
1636    vk_object_base_init(&cmd_buffer->device->vk, &src_buffer.base,
1637                        VK_OBJECT_TYPE_BUFFER);
1638 
1639    const struct VkBufferCreateInfo buf_create_info = {
1640       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1641       .size = src->planes[src_plane].size,
1642       .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
1643       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1644    };
1645    v3dv_buffer_init(cmd_buffer->device, &buf_create_info, &src_buffer,
1646                     src->planes[src_plane].alignment);
1647 
1648    const VkBindBufferMemoryInfo buf_bind_info = {
1649       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1650       .buffer = v3dv_buffer_to_handle(&src_buffer),
1651       .memory = v3dv_device_memory_to_handle(src->planes[src_plane].mem),
1652       .memoryOffset = src->planes[src_plane].mem_offset +
1653          v3dv_layer_offset(src, 0, 0, src_plane),
1654    };
1655    v3dv_buffer_bind_memory(&buf_bind_info);
1656 
1657    const VkBufferImageCopy2 copy_region = {
1658       .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
1659       .pNext = NULL,
1660       .bufferOffset = buf_offset,
1661       .bufferRowLength = buf_stride / bpp,
1662       .bufferImageHeight = src->vk.extent.height,
1663       .imageSubresource = region->dstSubresource,
1664       .imageOffset = region->dstOffset,
1665       .imageExtent = region->extent,
1666    };
1667 
1668    return texel_buffer_shader_copy(cmd_buffer,
1669                                    region->dstSubresource.aspectMask,
1670                                    dst,
1671                                    format,
1672                                    format,
1673                                    &src_buffer,
1674                                    src->planes[src_plane].cpp,
1675                                    0 /* color mask: full */, &ident_swizzle,
1676                                    1, &copy_region);
1677 }
1678 
1679 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)1680 v3dv_CmdCopyImage2(VkCommandBuffer commandBuffer,
1681                       const VkCopyImageInfo2 *info)
1682 
1683 {
1684    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1685    V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
1686    V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
1687 
1688    assert(src->vk.samples == dst->vk.samples);
1689 
1690    cmd_buffer->state.is_transfer = true;
1691 
1692    for (uint32_t i = 0; i < info->regionCount; i++) {
1693       const VkImageCopy2 *region = &info->pRegions[i];
1694       if (copy_image_tfu(cmd_buffer, dst, src, region))
1695          continue;
1696       if (copy_image_tlb(cmd_buffer, dst, src, region))
1697          continue;
1698       if (copy_image_blit(cmd_buffer, dst, src, region))
1699          continue;
1700       if (copy_image_linear_texel_buffer(cmd_buffer, dst, src, region))
1701          continue;
1702       unreachable("Image copy not supported");
1703    }
1704 
1705    cmd_buffer->state.is_transfer = false;
1706 }
1707 
1708 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1709 v3dv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1710                        const VkCopyBufferInfo2 *pCopyBufferInfo)
1711 {
1712    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1713    V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1714    V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1715 
1716    cmd_buffer->state.is_transfer = true;
1717 
1718    for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1719       v3dv_X(cmd_buffer->device, meta_copy_buffer)
1720          (cmd_buffer,
1721           dst_buffer->mem->bo, dst_buffer->mem_offset,
1722           src_buffer->mem->bo, src_buffer->mem_offset,
1723           &pCopyBufferInfo->pRegions[i]);
1724    }
1725 
1726    cmd_buffer->state.is_transfer = false;
1727 }
1728 
1729 static void
destroy_update_buffer_cb(VkDevice _device,uint64_t pobj,VkAllocationCallbacks * alloc)1730 destroy_update_buffer_cb(VkDevice _device,
1731                          uint64_t pobj,
1732                          VkAllocationCallbacks *alloc)
1733 {
1734    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1735    struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
1736    v3dv_bo_free(device, bo);
1737 }
1738 
1739 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1740 v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1741                      VkBuffer dstBuffer,
1742                      VkDeviceSize dstOffset,
1743                      VkDeviceSize dataSize,
1744                      const void *pData)
1745 {
1746    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1747    V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1748 
1749    struct v3dv_bo *src_bo =
1750       v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
1751    if (!src_bo) {
1752       fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
1753       return;
1754    }
1755 
1756    bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
1757    if (!ok) {
1758       fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
1759       return;
1760    }
1761 
1762    cmd_buffer->state.is_transfer = true;
1763 
1764    memcpy(src_bo->map, pData, dataSize);
1765 
1766    v3dv_bo_unmap(cmd_buffer->device, src_bo);
1767 
1768    VkBufferCopy2 region = {
1769       .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
1770       .srcOffset = 0,
1771       .dstOffset = dstOffset,
1772       .size = dataSize,
1773    };
1774    struct v3dv_job *copy_job =
1775       v3dv_X(cmd_buffer->device, meta_copy_buffer)
1776       (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
1777        src_bo, 0, &region);
1778 
1779    if (copy_job) {
1780       v3dv_cmd_buffer_add_private_obj(
1781          cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
1782    }
1783 
1784    cmd_buffer->state.is_transfer = false;
1785 }
1786 
1787 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)1788 v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
1789                    VkBuffer dstBuffer,
1790                    VkDeviceSize dstOffset,
1791                    VkDeviceSize size,
1792                    uint32_t data)
1793 {
1794    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1795    V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1796 
1797    cmd_buffer->state.is_transfer = true;
1798 
1799    struct v3dv_bo *bo = dst_buffer->mem->bo;
1800 
1801    /* From the Vulkan spec:
1802     *
1803     *   "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
1804     *    a multiple of 4, then the nearest smaller multiple is used."
1805     */
1806    if (size == VK_WHOLE_SIZE) {
1807       size = dst_buffer->size - dstOffset;
1808       size -= size % 4;
1809    }
1810 
1811    v3dv_X(cmd_buffer->device, meta_fill_buffer)
1812       (cmd_buffer, bo, dstOffset, size, data);
1813 
1814    cmd_buffer->state.is_transfer = false;
1815 }
1816 
1817 /**
1818  * Returns true if the implementation supports the requested operation (even if
1819  * it failed to process it, for example, due to an out-of-memory error).
1820  */
1821 static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1822 copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1823                          struct v3dv_image *image,
1824                          struct v3dv_buffer *buffer,
1825                          const VkBufferImageCopy2 *region)
1826 {
1827    if (V3D_DBG(DISABLE_TFU)) {
1828       perf_debug("Copy buffer to image: TFU disabled, fallbacks could be slower.\n");
1829       return false;
1830    }
1831 
1832    assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1833 
1834    /* Destination can't be raster format */
1835    if (!image->tiled)
1836       return false;
1837 
1838    /* We can't copy D24S8 because buffer to image copies only copy one aspect
1839     * at a time, and the TFU copies full images. Also, V3D depth bits for
1840     * both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
1841     * the Vulkan spec has the buffer data specified the other way around, so it
1842     * is not a straight copy, we would have to swizzle the channels, which the
1843     * TFU can't do.
1844     */
1845    if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
1846        image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
1847          return false;
1848    }
1849 
1850    /* Region must include full slice */
1851    const uint32_t offset_x = region->imageOffset.x;
1852    const uint32_t offset_y = region->imageOffset.y;
1853    if (offset_x != 0 || offset_y != 0)
1854       return false;
1855 
1856    uint32_t width, height;
1857    if (region->bufferRowLength == 0)
1858       width = region->imageExtent.width;
1859    else
1860       width = region->bufferRowLength;
1861 
1862    if (region->bufferImageHeight == 0)
1863       height = region->imageExtent.height;
1864    else
1865       height = region->bufferImageHeight;
1866 
1867    const uint8_t plane =
1868       v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
1869 
1870    const uint32_t mip_level = region->imageSubresource.mipLevel;
1871    const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level];
1872 
1873    if (width != slice->width || height != slice->height)
1874       return false;
1875 
1876    /* Handle region semantics for compressed images */
1877    const uint32_t block_w =
1878       vk_format_get_blockwidth(image->planes[plane].vk_format);
1879    const uint32_t block_h =
1880       vk_format_get_blockheight(image->planes[plane].vk_format);
1881    width = DIV_ROUND_UP(width, block_w);
1882    height = DIV_ROUND_UP(height, block_h);
1883 
1884    /* Format must be supported for texturing via the TFU. Since we are just
1885     * copying raw data and not converting between pixel formats, we can ignore
1886     * the image's format and choose a compatible TFU format for the image
1887     * texel size instead, which expands the list of formats we can handle here.
1888     */
1889    const struct v3dv_format *format =
1890       v3dv_get_compatible_tfu_format(cmd_buffer->device,
1891                                      image->planes[plane].cpp, NULL);
1892    /* We only use single-plane formats with the TFU */
1893    assert(format->plane_count == 1);
1894    const struct v3dv_format_plane *format_plane = &format->planes[0];
1895 
1896    uint32_t num_layers;
1897    if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1898       num_layers = region->imageSubresource.layerCount;
1899    else
1900       num_layers = region->imageExtent.depth;
1901    assert(num_layers > 0);
1902 
1903    assert(image->planes[plane].mem && image->planes[plane].mem->bo);
1904    const struct v3dv_bo *dst_bo = image->planes[plane].mem->bo;
1905 
1906    assert(buffer->mem && buffer->mem->bo);
1907    const struct v3dv_bo *src_bo = buffer->mem->bo;
1908 
1909    /* Emit a TFU job per layer to copy */
1910    const uint32_t buffer_stride = width * image->planes[plane].cpp;
1911    for (int i = 0; i < num_layers; i++) {
1912       uint32_t layer;
1913       if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1914          layer = region->imageSubresource.baseArrayLayer + i;
1915       else
1916          layer = region->imageOffset.z + i;
1917 
1918       const uint32_t buffer_offset =
1919          buffer->mem_offset + region->bufferOffset +
1920          height * buffer_stride * i;
1921       const uint32_t src_offset = src_bo->offset + buffer_offset;
1922 
1923       const uint32_t dst_offset =
1924          dst_bo->offset + v3dv_layer_offset(image, mip_level, layer, plane);
1925 
1926       v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
1927              cmd_buffer,
1928              dst_bo->handle,
1929              dst_offset,
1930              slice->tiling,
1931              slice->padded_height,
1932              image->planes[plane].cpp,
1933              src_bo->handle,
1934              src_offset,
1935              V3D_TILING_RASTER,
1936              width,
1937              1,
1938              width, height, format_plane);
1939    }
1940 
1941    return true;
1942 }
1943 
1944 /**
1945  * Returns true if the implementation supports the requested operation (even if
1946  * it failed to process it, for example, due to an out-of-memory error).
1947  */
1948 static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1949 copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1950                          struct v3dv_image *image,
1951                          struct v3dv_buffer *buffer,
1952                          const VkBufferImageCopy2 *region)
1953 {
1954    VkFormat fb_format;
1955    uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
1956    assert(plane < image->plane_count);
1957 
1958    if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
1959                               &region->imageOffset, &region->imageExtent,
1960                               &fb_format)) {
1961       return false;
1962    }
1963 
1964    uint32_t internal_type, internal_bpp;
1965    v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1966       (fb_format, region->imageSubresource.aspectMask,
1967        &internal_type, &internal_bpp);
1968 
1969    uint32_t num_layers;
1970    if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1971       num_layers = region->imageSubresource.layerCount;
1972    else
1973       num_layers = region->imageExtent.depth;
1974    assert(num_layers > 0);
1975 
1976    struct v3dv_job *job =
1977       v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1978    if (!job)
1979       return true;
1980 
1981    /* Handle copy to compressed format using a compatible format */
1982    const uint32_t block_w =
1983       vk_format_get_blockwidth(image->planes[plane].vk_format);
1984    const uint32_t block_h =
1985       vk_format_get_blockheight(image->planes[plane].vk_format);
1986    const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
1987    const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
1988 
1989    v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
1990                         internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
1991                         false);
1992 
1993    struct v3dv_meta_framebuffer framebuffer;
1994    v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
1995                                               internal_type, &job->frame_tiling);
1996 
1997    v3dv_X(job->device, job_emit_binning_flush)(job);
1998    v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl)
1999       (job, image, buffer, &framebuffer, region);
2000 
2001    v3dv_cmd_buffer_finish_job(cmd_buffer);
2002 
2003    return true;
2004 }
2005 
2006 static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)2007 create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
2008                                struct v3dv_image *image,
2009                                struct v3dv_buffer *buffer,
2010                                const VkBufferImageCopy2 *region)
2011 {
2012    if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
2013       return true;
2014    if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
2015       return true;
2016    return false;
2017 }
2018 
2019 static VkResult
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)2020 create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
2021 {
2022    /* If this is not the first pool we create for this command buffer
2023     * size it based on the size of the currently exhausted pool.
2024     */
2025    uint32_t descriptor_count = 64;
2026    if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
2027       struct v3dv_descriptor_pool *exhausted_pool =
2028          v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
2029       descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
2030    }
2031 
2032    /* Create the descriptor pool */
2033    cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
2034    VkDescriptorPoolSize pool_size = {
2035       .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2036       .descriptorCount = descriptor_count,
2037    };
2038    VkDescriptorPoolCreateInfo info = {
2039       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
2040       .maxSets = descriptor_count,
2041       .poolSizeCount = 1,
2042       .pPoolSizes = &pool_size,
2043       .flags = 0,
2044    };
2045    VkResult result =
2046       v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
2047                                 &info,
2048                                 &cmd_buffer->device->vk.alloc,
2049                                 &cmd_buffer->meta.texel_buffer_copy.dspool);
2050 
2051    if (result == VK_SUCCESS) {
2052       assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
2053       const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
2054 
2055       v3dv_cmd_buffer_add_private_obj(
2056          cmd_buffer, (uintptr_t) _pool,
2057          (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
2058 
2059       struct v3dv_descriptor_pool *pool =
2060          v3dv_descriptor_pool_from_handle(_pool);
2061       pool->is_driver_internal = true;
2062    }
2063 
2064    return result;
2065 }
2066 
2067 static VkResult
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)2068 allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
2069                                           VkDescriptorSet *set)
2070 {
2071    /* Make sure we have a descriptor pool */
2072    VkResult result;
2073    if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
2074       result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
2075       if (result != VK_SUCCESS)
2076          return result;
2077    }
2078    assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
2079 
2080    /* Allocate descriptor set */
2081    struct v3dv_device *device = cmd_buffer->device;
2082    VkDevice _device = v3dv_device_to_handle(device);
2083    VkDescriptorSetAllocateInfo info = {
2084       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2085       .descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
2086       .descriptorSetCount = 1,
2087       .pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
2088    };
2089    result = v3dv_AllocateDescriptorSets(_device, &info, set);
2090 
2091    /* If we ran out of pool space, grow the pool and try again */
2092    if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
2093       result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
2094       if (result == VK_SUCCESS) {
2095          info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
2096          result = v3dv_AllocateDescriptorSets(_device, &info, set);
2097       }
2098    }
2099 
2100    return result;
2101 }
2102 
2103 static void
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,uint8_t * key)2104 get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
2105                                          VkColorComponentFlags cmask,
2106                                          VkComponentMapping *cswizzle,
2107                                          bool is_layered,
2108                                          uint8_t *key)
2109 {
2110    memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2111 
2112    uint32_t *p = (uint32_t *) key;
2113 
2114    *p = format;
2115    p++;
2116 
2117    *p = cmask;
2118    p++;
2119 
2120    /* Note that that we are using a single byte for this, so we could pack
2121     * more data into this 32-bit slot in the future.
2122     */
2123    *p = is_layered ? 1 : 0;
2124    p++;
2125 
2126    memcpy(p, cswizzle, sizeof(VkComponentMapping));
2127    p += sizeof(VkComponentMapping) / sizeof(uint32_t);
2128 
2129    assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2130 }
2131 
2132 static bool
2133 create_blit_render_pass(struct v3dv_device *device,
2134                         VkFormat dst_format,
2135                         VkFormat src_format,
2136                         VkRenderPass *pass_load,
2137                         VkRenderPass *pass_no_load);
2138 
2139 static bool
2140 create_pipeline(struct v3dv_device *device,
2141                 struct v3dv_render_pass *pass,
2142                 struct nir_shader *vs_nir,
2143                 struct nir_shader *gs_nir,
2144                 struct nir_shader *fs_nir,
2145                 const VkPipelineVertexInputStateCreateInfo *vi_state,
2146                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
2147                 const VkPipelineColorBlendStateCreateInfo *cb_state,
2148                 const VkPipelineMultisampleStateCreateInfo *ms_state,
2149                 const VkPipelineLayout layout,
2150                 VkPipeline *pipeline);
2151 
2152 static nir_shader *
get_texel_buffer_copy_vs()2153 get_texel_buffer_copy_vs()
2154 {
2155    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2156    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
2157                                                   "meta texel buffer copy vs");
2158    nir_variable *vs_out_pos =
2159       nir_variable_create(b.shader, nir_var_shader_out,
2160                           glsl_vec4_type(), "gl_Position");
2161    vs_out_pos->data.location = VARYING_SLOT_POS;
2162 
2163    nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
2164    nir_store_var(&b, vs_out_pos, pos, 0xf);
2165 
2166    return b.shader;
2167 }
2168 
2169 static nir_shader *
get_texel_buffer_copy_gs()2170 get_texel_buffer_copy_gs()
2171 {
2172    /* FIXME: this creates a geometry shader that takes the index of a single
2173     * layer to clear from push constants, so we need to emit a draw call for
2174     * each layer that we want to clear. We could actually do better and have it
2175     * take a range of layers however, if we were to do this, we would need to
2176     * be careful not to exceed the maximum number of output vertices allowed in
2177     * a geometry shader.
2178     */
2179    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2180    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2181                                                   "meta texel buffer copy gs");
2182    nir_shader *nir = b.shader;
2183    nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
2184    nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
2185                                (1ull << VARYING_SLOT_LAYER);
2186    nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
2187    nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
2188    nir->info.gs.vertices_in = 3;
2189    nir->info.gs.vertices_out = 3;
2190    nir->info.gs.invocations = 1;
2191    nir->info.gs.active_stream_mask = 0x1;
2192 
2193    /* in vec4 gl_Position[3] */
2194    nir_variable *gs_in_pos =
2195       nir_variable_create(b.shader, nir_var_shader_in,
2196                           glsl_array_type(glsl_vec4_type(), 3, 0),
2197                           "in_gl_Position");
2198    gs_in_pos->data.location = VARYING_SLOT_POS;
2199 
2200    /* out vec4 gl_Position */
2201    nir_variable *gs_out_pos =
2202       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
2203                           "out_gl_Position");
2204    gs_out_pos->data.location = VARYING_SLOT_POS;
2205 
2206    /* out float gl_Layer */
2207    nir_variable *gs_out_layer =
2208       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
2209                           "out_gl_Layer");
2210    gs_out_layer->data.location = VARYING_SLOT_LAYER;
2211 
2212    /* Emit output triangle */
2213    for (uint32_t i = 0; i < 3; i++) {
2214       /* gl_Position from shader input */
2215       nir_deref_instr *in_pos_i =
2216          nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
2217       nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
2218 
2219       /* gl_Layer from push constants */
2220       nir_def *layer =
2221          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2222                                 .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
2223                                 .range = 4);
2224       nir_store_var(&b, gs_out_layer, layer, 0x1);
2225 
2226       nir_emit_vertex(&b, 0);
2227    }
2228 
2229    nir_end_primitive(&b, 0);
2230 
2231    return nir;
2232 }
2233 
2234 static nir_def *
load_frag_coord(nir_builder * b)2235 load_frag_coord(nir_builder *b)
2236 {
2237    nir_foreach_shader_in_variable(var, b->shader) {
2238       if (var->data.location == VARYING_SLOT_POS)
2239          return nir_load_var(b, var);
2240    }
2241    nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
2242                                            glsl_vec4_type(), NULL);
2243    pos->data.location = VARYING_SLOT_POS;
2244    return nir_load_var(b, pos);
2245 }
2246 
2247 static uint32_t
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp,VkComponentSwizzle swz)2248 component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
2249 {
2250    if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
2251       swz = comp;
2252 
2253    switch (swz) {
2254    case VK_COMPONENT_SWIZZLE_R:
2255       return 0;
2256    case VK_COMPONENT_SWIZZLE_G:
2257       return 1;
2258    case VK_COMPONENT_SWIZZLE_B:
2259       return 2;
2260    case VK_COMPONENT_SWIZZLE_A:
2261       return 3;
2262    default:
2263       unreachable("Invalid swizzle");
2264    };
2265 }
2266 
2267 static nir_shader *
get_texel_buffer_copy_fs(struct v3dv_device * device,VkFormat format,VkComponentMapping * cswizzle)2268 get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
2269                          VkComponentMapping *cswizzle)
2270 {
2271    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2272    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
2273                                                   "meta texel buffer copy fs");
2274 
2275    /* We only use the copy from texel buffer shader to implement
2276     * copy_buffer_to_image_shader, which always selects a compatible integer
2277     * format for the copy.
2278     */
2279    assert(vk_format_is_int(format));
2280 
2281    /* Fragment shader output color */
2282    nir_variable *fs_out_color =
2283       nir_variable_create(b.shader, nir_var_shader_out,
2284                           glsl_uvec4_type(), "out_color");
2285    fs_out_color->data.location = FRAG_RESULT_DATA0;
2286 
2287    /* Texel buffer input */
2288    const struct glsl_type *sampler_type =
2289       glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
2290    nir_variable *sampler =
2291       nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
2292    sampler->data.descriptor_set = 0;
2293    sampler->data.binding = 0;
2294 
2295    /* Load the box describing the pixel region we want to copy from the
2296     * texel buffer.
2297     */
2298    nir_def *box =
2299       nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
2300                              .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
2301                              .range = 16);
2302 
2303    /* Load the buffer stride (this comes in texel units) */
2304    nir_def *stride =
2305       nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2306                              .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
2307                              .range = 4);
2308 
2309    /* Load the buffer offset (this comes in texel units) */
2310    nir_def *offset =
2311       nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2312                              .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
2313                              .range = 4);
2314 
2315    nir_def *coord = nir_f2i32(&b, load_frag_coord(&b));
2316 
2317    /* Load pixel data from texel buffer based on the x,y offset of the pixel
2318     * within the box. Texel buffers are 1D arrays of texels.
2319     *
2320     * Notice that we already make sure that we only generate fragments that are
2321     * inside the box through the scissor/viewport state, so our offset into the
2322     * texel buffer should always be within its bounds and we we don't need
2323     * to add a check for that here.
2324     */
2325    nir_def *x_offset =
2326       nir_isub(&b, nir_channel(&b, coord, 0),
2327                    nir_channel(&b, box, 0));
2328    nir_def *y_offset =
2329       nir_isub(&b, nir_channel(&b, coord, 1),
2330                    nir_channel(&b, box, 1));
2331    nir_def *texel_offset =
2332       nir_iadd(&b, nir_iadd(&b, offset, x_offset),
2333                    nir_imul(&b, y_offset, stride));
2334 
2335    nir_def *tex_deref = &nir_build_deref_var(&b, sampler)->def;
2336    nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
2337    tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
2338    tex->op = nir_texop_txf;
2339    tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, texel_offset);
2340    tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
2341    tex->dest_type = nir_type_uint32;
2342    tex->is_array = false;
2343    tex->coord_components = 1;
2344    nir_def_init(&tex->instr, &tex->def, 4, 32);
2345    nir_builder_instr_insert(&b, &tex->instr);
2346 
2347    uint32_t swiz[4];
2348    swiz[0] =
2349       component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
2350    swiz[1] =
2351       component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
2352    swiz[2] =
2353       component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
2354    swiz[3] =
2355       component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
2356    nir_def *s = nir_swizzle(&b, &tex->def, swiz, 4);
2357    nir_store_var(&b, fs_out_color, s, 0xf);
2358 
2359    return b.shader;
2360 }
2361 
2362 static bool
create_texel_buffer_copy_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)2363 create_texel_buffer_copy_pipeline(struct v3dv_device *device,
2364                                   VkFormat format,
2365                                   VkColorComponentFlags cmask,
2366                                   VkComponentMapping *cswizzle,
2367                                   bool is_layered,
2368                                   VkRenderPass _pass,
2369                                   VkPipelineLayout pipeline_layout,
2370                                   VkPipeline *pipeline)
2371 {
2372    struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
2373 
2374    assert(vk_format_is_color(format));
2375 
2376    nir_shader *vs_nir = get_texel_buffer_copy_vs();
2377    nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
2378    nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
2379 
2380    const VkPipelineVertexInputStateCreateInfo vi_state = {
2381       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
2382       .vertexBindingDescriptionCount = 0,
2383       .vertexAttributeDescriptionCount = 0,
2384    };
2385 
2386    VkPipelineDepthStencilStateCreateInfo ds_state = {
2387       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
2388    };
2389 
2390    VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
2391    blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
2392       .blendEnable = false,
2393       .colorWriteMask = cmask,
2394    };
2395 
2396    const VkPipelineColorBlendStateCreateInfo cb_state = {
2397       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
2398       .logicOpEnable = false,
2399       .attachmentCount = 1,
2400       .pAttachments = blend_att_state
2401    };
2402 
2403    const VkPipelineMultisampleStateCreateInfo ms_state = {
2404       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
2405       .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
2406       .sampleShadingEnable = false,
2407       .pSampleMask = NULL,
2408       .alphaToCoverageEnable = false,
2409       .alphaToOneEnable = false,
2410    };
2411 
2412    return create_pipeline(device,
2413                           pass,
2414                           vs_nir, gs_nir, fs_nir,
2415                           &vi_state,
2416                           &ds_state,
2417                           &cb_state,
2418                           &ms_state,
2419                           pipeline_layout,
2420                           pipeline);
2421 }
2422 
2423 static bool
get_copy_texel_buffer_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,VkImageType image_type,bool is_layered,struct v3dv_meta_texel_buffer_copy_pipeline ** pipeline)2424 get_copy_texel_buffer_pipeline(
2425    struct v3dv_device *device,
2426    VkFormat format,
2427    VkColorComponentFlags cmask,
2428    VkComponentMapping *cswizzle,
2429    VkImageType image_type,
2430    bool is_layered,
2431    struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
2432 {
2433    bool ok = true;
2434 
2435    uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
2436    get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
2437                                             key);
2438 
2439    mtx_lock(&device->meta.mtx);
2440    struct hash_entry *entry =
2441       _mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
2442                               key);
2443    if (entry) {
2444       mtx_unlock(&device->meta.mtx);
2445       *pipeline = entry->data;
2446       return true;
2447    }
2448 
2449    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
2450                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2451 
2452    if (*pipeline == NULL)
2453       goto fail;
2454 
2455    /* The blit render pass is compatible */
2456    ok = create_blit_render_pass(device, format, format,
2457                                 &(*pipeline)->pass,
2458                                 &(*pipeline)->pass_no_load);
2459    if (!ok)
2460       goto fail;
2461 
2462    ok =
2463       create_texel_buffer_copy_pipeline(device,
2464                                         format, cmask, cswizzle, is_layered,
2465                                         (*pipeline)->pass,
2466                                         device->meta.texel_buffer_copy.p_layout,
2467                                         &(*pipeline)->pipeline);
2468    if (!ok)
2469       goto fail;
2470 
2471    uint8_t *dupkey = malloc(V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2472    memcpy(dupkey, key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2473    _mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
2474                            dupkey, *pipeline);
2475 
2476    mtx_unlock(&device->meta.mtx);
2477    return true;
2478 
2479 fail:
2480    mtx_unlock(&device->meta.mtx);
2481 
2482    VkDevice _device = v3dv_device_to_handle(device);
2483    if (*pipeline) {
2484       if ((*pipeline)->pass)
2485          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
2486       if ((*pipeline)->pipeline)
2487          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
2488       vk_free(&device->vk.alloc, *pipeline);
2489       *pipeline = NULL;
2490    }
2491 
2492    return false;
2493 }
2494 
2495 static bool
texel_buffer_shader_copy(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)2496 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
2497                          VkImageAspectFlags aspect,
2498                          struct v3dv_image *image,
2499                          VkFormat dst_format,
2500                          VkFormat src_format,
2501                          struct v3dv_buffer *buffer,
2502                          uint32_t buffer_bpp,
2503                          VkColorComponentFlags cmask,
2504                          VkComponentMapping *cswizzle,
2505                          uint32_t region_count,
2506                          const VkBufferImageCopy2 *regions)
2507 {
2508    VkResult result;
2509    bool handled = false;
2510 
2511    assert(cswizzle);
2512 
2513    /* This is a copy path, so we don't handle format conversions. The only
2514     * exception are stencil to D24S8 copies, which are handled as a color
2515     * masked R8->RGBA8 copy.
2516     */
2517    assert(src_format == dst_format ||
2518           (dst_format == VK_FORMAT_R8G8B8A8_UINT &&
2519            src_format == VK_FORMAT_R8_UINT &&
2520            cmask == VK_COLOR_COMPONENT_R_BIT));
2521 
2522    /* We only handle color copies. Callers can copy D/S aspects by using
2523     * a compatible color format and maybe a cmask/cswizzle for D24 formats.
2524     */
2525    if (!vk_format_is_color(dst_format) || !vk_format_is_color(src_format))
2526       return handled;
2527 
2528    /* FIXME: we only handle uncompressed images for now. */
2529    if (vk_format_is_compressed(image->vk.format))
2530       return handled;
2531 
2532    const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
2533                                             VK_COLOR_COMPONENT_G_BIT |
2534                                             VK_COLOR_COMPONENT_B_BIT |
2535                                             VK_COLOR_COMPONENT_A_BIT;
2536    if (cmask == 0)
2537       cmask = full_cmask;
2538 
2539    /* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
2540     * so we can bind it as a texel buffer. Otherwise, the buffer view
2541     * we create below won't setup the texture state that we need for this.
2542     */
2543    if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
2544       if (v3dv_buffer_format_supports_features(
2545              cmd_buffer->device, src_format,
2546              VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT)) {
2547          buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
2548       } else {
2549          return handled;
2550       }
2551    }
2552 
2553    /* At this point we should be able to handle the copy unless an unexpected
2554     * error occurs, such as an OOM.
2555     */
2556    handled = true;
2557 
2558 
2559    /* Compute the number of layers to copy.
2560     *
2561     * If we are batching (region_count > 1) all our regions have the same
2562     * image subresource so we can take this from the first region. For 3D
2563     * images we require the same depth extent.
2564     */
2565    const VkImageSubresourceLayers *resource = &regions[0].imageSubresource;
2566    uint32_t num_layers;
2567    if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2568       num_layers = resource->layerCount;
2569    } else {
2570       assert(region_count == 1);
2571       num_layers = regions[0].imageExtent.depth;
2572    }
2573    assert(num_layers > 0);
2574 
2575    /* Get the texel buffer copy pipeline */
2576    struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
2577    bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
2578                                             dst_format, cmask, cswizzle,
2579                                             image->vk.image_type, num_layers > 1,
2580                                             &pipeline);
2581    if (!ok)
2582       return handled;
2583    assert(pipeline && pipeline->pipeline && pipeline->pass);
2584 
2585    /* Setup descriptor set for the source texel buffer. We don't have to
2586     * register the descriptor as a private command buffer object since
2587     * all descriptors will be freed automatically with the descriptor
2588     * pool.
2589     */
2590    VkDescriptorSet set;
2591    result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
2592    if (result != VK_SUCCESS)
2593       return handled;
2594 
2595    /* We can't pass region->bufferOffset here for the offset field because
2596     * the texture base pointer in the texture shader state must be a 64-byte
2597     * aligned value. Instead, we use 0 here and we pass the offset in texels
2598     * as a push constant to the shader.
2599     */
2600    VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
2601    VkBufferViewCreateInfo buffer_view_info = {
2602       .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
2603       .buffer = v3dv_buffer_to_handle(buffer),
2604       .format = src_format,
2605       .offset = 0,
2606       .range = VK_WHOLE_SIZE,
2607    };
2608 
2609    VkBufferView texel_buffer_view;
2610    result = v3dv_CreateBufferView(_device, &buffer_view_info,
2611                                   &cmd_buffer->device->vk.alloc,
2612                                   &texel_buffer_view);
2613    if (result != VK_SUCCESS)
2614       return handled;
2615 
2616    v3dv_cmd_buffer_add_private_obj(
2617       cmd_buffer, (uintptr_t)texel_buffer_view,
2618       (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
2619 
2620    VkWriteDescriptorSet write = {
2621       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2622       .dstSet = set,
2623       .dstBinding = 0,
2624       .dstArrayElement = 0,
2625       .descriptorCount = 1,
2626       .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2627       .pTexelBufferView = &texel_buffer_view,
2628    };
2629    v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
2630 
2631    /* Push command buffer state before starting meta operation */
2632    v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
2633 
2634    /* Bind common state for all layers and regions  */
2635    VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
2636    v3dv_CmdBindPipeline(_cmd_buffer,
2637                         VK_PIPELINE_BIND_POINT_GRAPHICS,
2638                         pipeline->pipeline);
2639 
2640    v3dv_CmdBindDescriptorSets(_cmd_buffer,
2641                               VK_PIPELINE_BIND_POINT_GRAPHICS,
2642                               cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2643                               0, 1, &set,
2644                               0, NULL);
2645 
2646    /* Setup framebuffer.
2647     *
2648     * For 3D images, this creates a layered framebuffer with a number of
2649     * layers matching the depth extent of the 3D image.
2650     */
2651    uint8_t plane = v3dv_plane_from_aspect(aspect);
2652    uint32_t fb_width = u_minify(image->planes[plane].width, resource->mipLevel);
2653    uint32_t fb_height = u_minify(image->planes[plane].height, resource->mipLevel);
2654 
2655    VkImageViewCreateInfo image_view_info = {
2656       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2657       .image = v3dv_image_to_handle(image),
2658       .viewType = v3dv_image_type_to_view_type(image->vk.image_type),
2659       .format = dst_format,
2660       .subresourceRange = {
2661          .aspectMask = aspect,
2662          .baseMipLevel = resource->mipLevel,
2663          .levelCount = 1,
2664          .baseArrayLayer = resource->baseArrayLayer,
2665          .layerCount = num_layers,
2666       },
2667    };
2668    VkImageView image_view;
2669    result = v3dv_create_image_view(cmd_buffer->device,
2670                                    &image_view_info, &image_view);
2671    if (result != VK_SUCCESS)
2672       goto fail;
2673 
2674    v3dv_cmd_buffer_add_private_obj(
2675       cmd_buffer, (uintptr_t)image_view,
2676       (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
2677 
2678    VkFramebufferCreateInfo fb_info = {
2679       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
2680       .renderPass = pipeline->pass,
2681       .attachmentCount = 1,
2682       .pAttachments = &image_view,
2683       .width = fb_width,
2684       .height = fb_height,
2685       .layers = num_layers,
2686    };
2687 
2688    VkFramebuffer fb;
2689    result = v3dv_CreateFramebuffer(_device, &fb_info,
2690                                    &cmd_buffer->device->vk.alloc, &fb);
2691    if (result != VK_SUCCESS)
2692       goto fail;
2693 
2694     v3dv_cmd_buffer_add_private_obj(
2695        cmd_buffer, (uintptr_t)fb,
2696        (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
2697 
2698    /* For each layer */
2699    for (uint32_t l = 0; l < num_layers; l++) {
2700        /* Start render pass for this layer.
2701         *
2702         * If the we only have one region to copy, then we might be able to
2703         * skip the TLB load if it is aligned to tile boundaries. All layers
2704         * copy the same area, so we only need to check this once.
2705         */
2706       bool can_skip_tlb_load = false;
2707       VkRect2D render_area;
2708       if (region_count == 1) {
2709          render_area.offset.x = regions[0].imageOffset.x;
2710          render_area.offset.y = regions[0].imageOffset.y;
2711          render_area.extent.width = regions[0].imageExtent.width;
2712          render_area.extent.height = regions[0].imageExtent.height;
2713 
2714          if (l == 0) {
2715             struct v3dv_render_pass *pipeline_pass =
2716                v3dv_render_pass_from_handle(pipeline->pass);
2717             can_skip_tlb_load =
2718                cmask == full_cmask &&
2719                v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
2720                                                  v3dv_framebuffer_from_handle(fb),
2721                                                  pipeline_pass, 0);
2722          }
2723       } else {
2724          render_area.offset.x = 0;
2725          render_area.offset.y = 0;
2726          render_area.extent.width = fb_width;
2727          render_area.extent.height = fb_height;
2728       }
2729 
2730       VkRenderPassBeginInfo rp_info = {
2731          .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
2732          .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
2733                                            pipeline->pass,
2734          .framebuffer = fb,
2735          .renderArea = render_area,
2736          .clearValueCount = 0,
2737       };
2738 
2739       VkSubpassBeginInfo sp_info = {
2740          .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
2741          .contents = VK_SUBPASS_CONTENTS_INLINE,
2742       };
2743 
2744       v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
2745       struct v3dv_job *job = cmd_buffer->state.job;
2746       if (!job)
2747          goto fail;
2748 
2749       /* If we are using a layered copy we need to specify the layer for the
2750        * Geometry Shader.
2751        */
2752       if (num_layers > 1) {
2753          uint32_t layer = resource->baseArrayLayer + l;
2754          v3dv_CmdPushConstants(_cmd_buffer,
2755                                cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2756                                VK_SHADER_STAGE_GEOMETRY_BIT,
2757                                24, 4, &layer);
2758       }
2759 
2760       /* For each region */
2761       for (uint32_t r = 0; r < region_count; r++) {
2762          const VkBufferImageCopy2 *region = &regions[r];
2763 
2764          /* Obtain the 2D buffer region spec */
2765          uint32_t buf_width, buf_height;
2766          if (region->bufferRowLength == 0)
2767              buf_width = region->imageExtent.width;
2768          else
2769              buf_width = region->bufferRowLength;
2770 
2771          if (region->bufferImageHeight == 0)
2772              buf_height = region->imageExtent.height;
2773          else
2774              buf_height = region->bufferImageHeight;
2775 
2776          const VkViewport viewport = {
2777             .x = region->imageOffset.x,
2778             .y = region->imageOffset.y,
2779             .width = region->imageExtent.width,
2780             .height = region->imageExtent.height,
2781             .minDepth = 0.0f,
2782             .maxDepth = 1.0f
2783          };
2784          v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
2785          const VkRect2D scissor = {
2786             .offset = { region->imageOffset.x, region->imageOffset.y },
2787             .extent = { region->imageExtent.width, region->imageExtent.height }
2788          };
2789          v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
2790 
2791          const VkDeviceSize buf_offset =
2792             region->bufferOffset / buffer_bpp  + l * buf_height * buf_width;
2793          uint32_t push_data[6] = {
2794             region->imageOffset.x,
2795             region->imageOffset.y,
2796             region->imageOffset.x + region->imageExtent.width - 1,
2797             region->imageOffset.y + region->imageExtent.height - 1,
2798             buf_width,
2799             buf_offset,
2800          };
2801 
2802          v3dv_CmdPushConstants(_cmd_buffer,
2803                                cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2804                                VK_SHADER_STAGE_FRAGMENT_BIT,
2805                                0, sizeof(push_data), &push_data);
2806 
2807          v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
2808       } /* For each region */
2809 
2810       VkSubpassEndInfo sp_end_info = {
2811          .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
2812       };
2813 
2814       v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
2815    } /* For each layer */
2816 
2817 fail:
2818    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, true);
2819    return handled;
2820 }
2821 
2822 /**
2823  * Returns true if the implementation supports the requested operation (even if
2824  * it failed to process it, for example, due to an out-of-memory error).
2825  */
2826 static bool
copy_buffer_to_image_blit(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)2827 copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
2828                           VkImageAspectFlags aspect,
2829                           struct v3dv_image *image,
2830                           VkFormat dst_format,
2831                           VkFormat src_format,
2832                           struct v3dv_buffer *buffer,
2833                           uint32_t buffer_bpp,
2834                           VkColorComponentFlags cmask,
2835                           VkComponentMapping *cswizzle,
2836                           uint32_t region_count,
2837                           const VkBufferImageCopy2 *regions)
2838 {
2839    /* Since we can't sample linear images we need to upload the linear
2840     * buffer to a tiled image that we can use as a blit source, which
2841     * is slow.
2842     */
2843    perf_debug("Falling back to blit path for buffer to image copy.\n");
2844 
2845    struct v3dv_device *device = cmd_buffer->device;
2846    VkDevice _device = v3dv_device_to_handle(device);
2847    bool handled = true;
2848 
2849    /* Allocate memory for the tiled image. Since we copy layer by layer
2850     * we allocate memory to hold a full layer, which is the worse case.
2851     * For that we create a dummy image with that spec, get memory requirements
2852     * for it and use that information to create the memory allocation.
2853     * We will then reuse this memory store for all the regions we want to
2854     * copy.
2855     */
2856    VkImage dummy_image;
2857    VkImageCreateInfo dummy_info = {
2858       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2859       .imageType = VK_IMAGE_TYPE_2D,
2860       .format = src_format,
2861       .extent = { image->vk.extent.width, image->vk.extent.height, 1 },
2862       .mipLevels = 1,
2863       .arrayLayers = 1,
2864       .samples = VK_SAMPLE_COUNT_1_BIT,
2865       .tiling = VK_IMAGE_TILING_OPTIMAL,
2866       .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2867                VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2868       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2869       .queueFamilyIndexCount = 0,
2870       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2871    };
2872    VkResult result =
2873       v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
2874    if (result != VK_SUCCESS)
2875       return handled;
2876 
2877    VkMemoryRequirements reqs;
2878    vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
2879    v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
2880 
2881    VkDeviceMemory mem;
2882    VkMemoryAllocateInfo alloc_info = {
2883       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2884       .allocationSize = reqs.size,
2885       .memoryTypeIndex = 0,
2886    };
2887    result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
2888    if (result != VK_SUCCESS)
2889       return handled;
2890 
2891    v3dv_cmd_buffer_add_private_obj(
2892       cmd_buffer, (uintptr_t)mem,
2893       (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
2894 
2895    /* Obtain the layer count.
2896     *
2897     * If we are batching (region_count > 1) all our regions have the same
2898     * image subresource so we can take this from the first region.
2899     */
2900    uint32_t num_layers;
2901    if (image->vk.image_type != VK_IMAGE_TYPE_3D)
2902       num_layers = regions[0].imageSubresource.layerCount;
2903    else
2904       num_layers = regions[0].imageExtent.depth;
2905    assert(num_layers > 0);
2906 
2907    /* Sanity check: we can only batch multiple regions together if they have
2908     * the same framebuffer (so the same layer).
2909     */
2910    assert(num_layers == 1 || region_count == 1);
2911 
2912    uint8_t plane = v3dv_plane_from_aspect(aspect);
2913    assert(plane < image->plane_count);
2914 
2915    const uint32_t block_width =
2916       vk_format_get_blockwidth(image->planes[plane].vk_format);
2917    const uint32_t block_height =
2918       vk_format_get_blockheight(image->planes[plane].vk_format);
2919 
2920    /* Copy regions by uploading each region to a temporary tiled image using
2921     * the memory we have just allocated as storage.
2922     */
2923    for (uint32_t r = 0; r < region_count; r++) {
2924       const VkBufferImageCopy2 *region = &regions[r];
2925 
2926       /* Obtain the 2D buffer region spec */
2927       uint32_t buf_width, buf_height;
2928       if (region->bufferRowLength == 0)
2929           buf_width = region->imageExtent.width;
2930       else
2931           buf_width = region->bufferRowLength;
2932 
2933       if (region->bufferImageHeight == 0)
2934           buf_height = region->imageExtent.height;
2935       else
2936           buf_height = region->bufferImageHeight;
2937 
2938       /* If the image is compressed, the bpp refers to blocks, not pixels */
2939       buf_width = buf_width / block_width;
2940       buf_height = buf_height / block_height;
2941 
2942       for (uint32_t i = 0; i < num_layers; i++) {
2943          /* Create the tiled image */
2944          VkImageCreateInfo image_info = {
2945             .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2946             .imageType = VK_IMAGE_TYPE_2D,
2947             .format = src_format,
2948             .extent = { buf_width, buf_height, 1 },
2949             .mipLevels = 1,
2950             .arrayLayers = 1,
2951             .samples = VK_SAMPLE_COUNT_1_BIT,
2952             .tiling = VK_IMAGE_TILING_OPTIMAL,
2953             .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2954                      VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2955             .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2956             .queueFamilyIndexCount = 0,
2957             .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2958          };
2959 
2960          VkImage buffer_image;
2961          VkResult result =
2962             v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
2963                              &buffer_image);
2964          if (result != VK_SUCCESS)
2965             return handled;
2966 
2967          v3dv_cmd_buffer_add_private_obj(
2968             cmd_buffer, (uintptr_t)buffer_image,
2969             (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
2970 
2971          result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
2972          if (result != VK_SUCCESS)
2973             return handled;
2974 
2975          /* When copying a multi-plane image the aspect indicates the plane to
2976           * copy. For these, we only copy one plane at a time, which is always
2977           * a color plane.
2978           */
2979          VkImageAspectFlags copy_aspect =
2980             image->plane_count == 1 ? aspect : VK_IMAGE_ASPECT_COLOR_BIT;
2981 
2982          /* Upload buffer contents for the selected layer */
2983          const VkDeviceSize buf_offset_bytes =
2984             region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
2985          const VkBufferImageCopy2 buffer_image_copy = {
2986             .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
2987             .bufferOffset = buf_offset_bytes,
2988             .bufferRowLength = region->bufferRowLength / block_width,
2989             .bufferImageHeight = region->bufferImageHeight / block_height,
2990             .imageSubresource = {
2991                .aspectMask = copy_aspect,
2992                .mipLevel = 0,
2993                .baseArrayLayer = 0,
2994                .layerCount = 1,
2995             },
2996             .imageOffset = { 0, 0, 0 },
2997             .imageExtent = { buf_width, buf_height, 1 }
2998          };
2999          handled =
3000             create_tiled_image_from_buffer(cmd_buffer,
3001                                            v3dv_image_from_handle(buffer_image),
3002                                            buffer, &buffer_image_copy);
3003          if (!handled) {
3004             /* This is unexpected, we should have setup the upload to be
3005              * conformant to a TFU or TLB copy.
3006              */
3007             unreachable("Unable to copy buffer to image through TLB");
3008             return false;
3009          }
3010 
3011          /* Blit-copy the requested image extent from the buffer image to the
3012           * destination image.
3013           *
3014           * Since we are copying, the blit must use the same format on the
3015           * destination and source images to avoid format conversions. The
3016           * only exception is copying stencil, which we upload to a R8UI source
3017           * image, but that we need to blit to a S8D24 destination (the only
3018           * stencil format we support).
3019           */
3020          const VkImageBlit2 blit_region = {
3021             .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
3022             .srcSubresource = {
3023                .aspectMask = copy_aspect,
3024                .mipLevel = 0,
3025                .baseArrayLayer = 0,
3026                .layerCount = 1,
3027             },
3028             .srcOffsets = {
3029                { 0, 0, 0 },
3030                { region->imageExtent.width, region->imageExtent.height, 1 },
3031             },
3032             .dstSubresource = {
3033                .aspectMask = aspect,
3034                .mipLevel = region->imageSubresource.mipLevel,
3035                .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
3036                .layerCount = 1,
3037             },
3038             .dstOffsets = {
3039                {
3040                   DIV_ROUND_UP(region->imageOffset.x, block_width),
3041                   DIV_ROUND_UP(region->imageOffset.y, block_height),
3042                   region->imageOffset.z + i,
3043                },
3044                {
3045                   DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
3046                                block_width),
3047                   DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
3048                                block_height),
3049                   region->imageOffset.z + i + 1,
3050                },
3051             },
3052          };
3053 
3054          handled = blit_shader(cmd_buffer,
3055                                image, dst_format,
3056                                v3dv_image_from_handle(buffer_image), src_format,
3057                                cmask, cswizzle,
3058                                &blit_region, VK_FILTER_NEAREST, true);
3059          if (!handled) {
3060             /* This is unexpected, we should have a supported blit spec */
3061             unreachable("Unable to blit buffer to destination image");
3062             return false;
3063          }
3064       }
3065    }
3066 
3067    return handled;
3068 }
3069 
3070 /**
3071  * Returns true if the implementation supports the requested operation (even if
3072  * it failed to process it, for example, due to an out-of-memory error).
3073  */
3074 static bool
copy_buffer_to_image_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,uint32_t region_count,const VkBufferImageCopy2 * regions,bool use_texel_buffer)3075 copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
3076                             struct v3dv_image *image,
3077                             struct v3dv_buffer *buffer,
3078                             uint32_t region_count,
3079                             const VkBufferImageCopy2 *regions,
3080                             bool use_texel_buffer)
3081 {
3082    /* We can only call this with region_count > 1 if we can batch the regions
3083     * together, in which case they share the same image subresource, and so
3084     * the same aspect.
3085     */
3086    VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
3087    const VkImageAspectFlagBits any_plane_aspect =
3088       VK_IMAGE_ASPECT_PLANE_0_BIT |
3089       VK_IMAGE_ASPECT_PLANE_1_BIT |
3090       VK_IMAGE_ASPECT_PLANE_2_BIT;
3091 
3092    bool is_plane_aspect = aspect & any_plane_aspect;
3093 
3094    /* Generally, the bpp of the data in the buffer matches that of the
3095     * destination image. The exception is the case where we are uploading
3096     * stencil (8bpp) to a combined d24s8 image (32bpp).
3097     */
3098    uint8_t plane = v3dv_plane_from_aspect(aspect);
3099    assert(plane < image->plane_count);
3100    uint32_t buf_bpp = image->planes[plane].cpp;
3101 
3102    /* We are about to upload the buffer data to an image so we can then
3103     * blit that to our destination region. Because we are going to implement
3104     * the copy as a blit, we want our blit source and destination formats to be
3105     * the same (to avoid any format conversions), so we choose a canonical
3106     * format that matches the destination image bpp.
3107     */
3108    VkComponentMapping ident_swizzle = {
3109       .r = VK_COMPONENT_SWIZZLE_IDENTITY,
3110       .g = VK_COMPONENT_SWIZZLE_IDENTITY,
3111       .b = VK_COMPONENT_SWIZZLE_IDENTITY,
3112       .a = VK_COMPONENT_SWIZZLE_IDENTITY,
3113    };
3114 
3115    VkComponentMapping cswizzle = ident_swizzle;
3116    VkColorComponentFlags cmask = 0; /* Write all components */
3117    VkFormat src_format;
3118    VkFormat dst_format;
3119    switch (buf_bpp) {
3120    case 16:
3121       assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
3122       src_format = VK_FORMAT_R32G32B32A32_UINT;
3123       dst_format = src_format;
3124       break;
3125    case 8:
3126       assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
3127       src_format = VK_FORMAT_R16G16B16A16_UINT;
3128       dst_format = src_format;
3129       break;
3130    case 4:
3131       switch (aspect) {
3132       case VK_IMAGE_ASPECT_COLOR_BIT:
3133       case VK_IMAGE_ASPECT_PLANE_0_BIT:
3134       case VK_IMAGE_ASPECT_PLANE_1_BIT:
3135       case VK_IMAGE_ASPECT_PLANE_2_BIT:
3136          src_format = VK_FORMAT_R8G8B8A8_UINT;
3137          dst_format = src_format;
3138          break;
3139       case VK_IMAGE_ASPECT_DEPTH_BIT:
3140          assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
3141                 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
3142                 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
3143          src_format = VK_FORMAT_R8G8B8A8_UINT;
3144          dst_format = src_format;
3145 
3146          /* For D24 formats, the Vulkan spec states that the depth component
3147           * in the buffer is stored in the 24-LSB, but V3D wants it in the
3148           * 24-MSB.
3149           */
3150          if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
3151              image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
3152             cmask = VK_COLOR_COMPONENT_G_BIT |
3153                     VK_COLOR_COMPONENT_B_BIT |
3154                     VK_COLOR_COMPONENT_A_BIT;
3155             cswizzle.r = VK_COMPONENT_SWIZZLE_R;
3156             cswizzle.g = VK_COMPONENT_SWIZZLE_R;
3157             cswizzle.b = VK_COMPONENT_SWIZZLE_G;
3158             cswizzle.a = VK_COMPONENT_SWIZZLE_B;
3159          }
3160          break;
3161       case VK_IMAGE_ASPECT_STENCIL_BIT:
3162          /* Since we don't support separate stencil this is always a stencil
3163           * copy to a combined depth/stencil image. Because we don't support
3164           * separate stencil images, we interpret the buffer data as a
3165           * color R8UI image, and implement the blit as a compatible color
3166           * blit to an RGBA8UI destination masking out writes to components
3167           * GBA (which map to the D24 component of a S8D24 image).
3168           */
3169          assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
3170          buf_bpp = 1;
3171          src_format = VK_FORMAT_R8_UINT;
3172          dst_format = VK_FORMAT_R8G8B8A8_UINT;
3173          cmask = VK_COLOR_COMPONENT_R_BIT;
3174          break;
3175       default:
3176          unreachable("unsupported aspect");
3177          return false;
3178       };
3179       break;
3180    case 2:
3181       assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
3182              aspect == VK_IMAGE_ASPECT_DEPTH_BIT ||
3183              is_plane_aspect);
3184       src_format = VK_FORMAT_R16_UINT;
3185       dst_format = src_format;
3186       break;
3187    case 1:
3188       assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT || is_plane_aspect);
3189       src_format = VK_FORMAT_R8_UINT;
3190       dst_format = src_format;
3191       break;
3192    default:
3193       unreachable("unsupported bit-size");
3194       return false;
3195    }
3196 
3197    if (use_texel_buffer) {
3198       return texel_buffer_shader_copy(cmd_buffer, aspect, image,
3199                                       dst_format, src_format,
3200                                       buffer, buf_bpp,
3201                                       cmask, &cswizzle,
3202                                       region_count, regions);
3203    } else {
3204       return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
3205                                        dst_format, src_format,
3206                                        buffer, buf_bpp,
3207                                        cmask, &cswizzle,
3208                                        region_count, regions);
3209    }
3210 }
3211 
3212 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)3213 v3dv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
3214                               const VkCopyBufferToImageInfo2 *info)
3215 {
3216    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
3217    V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
3218    V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
3219 
3220    assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3221 
3222    cmd_buffer->state.is_transfer = true;
3223 
3224    uint32_t r = 0;
3225    while (r < info->regionCount) {
3226       /* The TFU and TLB paths can only copy one region at a time and the region
3227        * needs to start at the origin. We try these first for the common case
3228        * where we are copying full images, since they should be the fastest.
3229        */
3230       uint32_t batch_size = 1;
3231       if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
3232          goto handled;
3233 
3234       if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
3235          goto handled;
3236 
3237       /* Otherwise, we are copying subrects, so we fallback to copying
3238        * via shader and texel buffers and we try to batch the regions
3239        * if possible. We can only batch copies if they have the same
3240        * framebuffer spec, which is mostly determined by the image
3241        * subresource of the region.
3242        */
3243       const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
3244       for (uint32_t s = r + 1; s < info->regionCount; s++) {
3245          const VkImageSubresourceLayers *rsc_s =
3246             &info->pRegions[s].imageSubresource;
3247 
3248          if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
3249             break;
3250 
3251          /* For 3D images we also need to check the depth extent */
3252          if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
3253              info->pRegions[s].imageExtent.depth !=
3254              info->pRegions[r].imageExtent.depth) {
3255                break;
3256          }
3257 
3258          batch_size++;
3259       }
3260 
3261       if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
3262                                       batch_size, &info->pRegions[r], true)) {
3263          goto handled;
3264       }
3265 
3266       /* If we still could not copy, fallback to slower paths.
3267        *
3268        * FIXME: we could try to batch these too, but since they are bound to be
3269        * slow it might not be worth it and we should instead put more effort
3270        * in handling more cases with the other paths.
3271        */
3272       if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
3273                                       batch_size, &info->pRegions[r], false)) {
3274          goto handled;
3275       }
3276 
3277       unreachable("Unsupported buffer to image copy.");
3278 
3279 handled:
3280       r += batch_size;
3281    }
3282 
3283    cmd_buffer->state.is_transfer = false;
3284 }
3285 
3286 static void
3287 compute_blit_3d_layers(const VkOffset3D *offsets,
3288                        uint32_t *min_layer, uint32_t *max_layer,
3289                        bool *mirror_z);
3290 
3291 /**
3292  * Returns true if the implementation supports the requested operation (even if
3293  * it failed to process it, for example, due to an out-of-memory error).
3294  *
3295  * The TFU blit path doesn't handle scaling so the blit filter parameter can
3296  * be ignored.
3297  */
3298 static bool
blit_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageBlit2 * region)3299 blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
3300          struct v3dv_image *dst,
3301          struct v3dv_image *src,
3302          const VkImageBlit2 *region)
3303 {
3304    if (V3D_DBG(DISABLE_TFU)) {
3305       perf_debug("Blit: TFU disabled, fallbacks could be slower.");
3306       return false;
3307    }
3308 
3309    assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3310    assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3311 
3312    /* From vkCmdBlitImage:
3313     *   "srcImage must not use a format that requires a sampler YCBCR
3314     *    conversion"
3315     *   "dstImage must not use a format that requires a sampler YCBCR
3316     *    conversion"
3317     */
3318    assert(dst->plane_count == 1);
3319    assert(src->plane_count == 1);
3320 
3321    /* Format must match */
3322    if (src->vk.format != dst->vk.format)
3323       return false;
3324 
3325    /* Destination can't be raster format */
3326    if (!dst->tiled)
3327       return false;
3328 
3329    /* Source region must start at (0,0) */
3330    if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
3331       return false;
3332 
3333    /* Destination image must be complete */
3334    if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
3335       return false;
3336 
3337    const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
3338    const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
3339    const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
3340    if (region->dstOffsets[1].x < dst_width - 1||
3341        region->dstOffsets[1].y < dst_height - 1) {
3342       return false;
3343    }
3344 
3345    /* No XY scaling */
3346    if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
3347        region->srcOffsets[1].y != region->dstOffsets[1].y) {
3348       return false;
3349    }
3350 
3351    /* If the format is D24S8 both aspects need to be copied, since the TFU
3352     * can't be programmed to copy only one aspect of the image.
3353     */
3354    if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
3355        const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
3356                                              VK_IMAGE_ASPECT_STENCIL_BIT;
3357        if (region->dstSubresource.aspectMask != ds_aspects)
3358           return false;
3359    }
3360 
3361    /* Our TFU blits only handle exact copies (it requires same formats
3362     * on input and output, no scaling, etc), so there is no pixel format
3363     * conversions and we can rewrite the format to use one that is TFU
3364     * compatible based on its texel size.
3365     */
3366    const struct v3dv_format *format =
3367       v3dv_get_compatible_tfu_format(cmd_buffer->device,
3368                                      dst->planes[0].cpp, NULL);
3369 
3370    /* Emit a TFU job for each layer to blit */
3371    assert(region->dstSubresource.layerCount ==
3372           region->srcSubresource.layerCount);
3373 
3374    uint32_t min_dst_layer;
3375    uint32_t max_dst_layer;
3376    bool dst_mirror_z = false;
3377    if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
3378       compute_blit_3d_layers(region->dstOffsets,
3379                              &min_dst_layer, &max_dst_layer,
3380                              &dst_mirror_z);
3381    } else {
3382       min_dst_layer = region->dstSubresource.baseArrayLayer;
3383       max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
3384    }
3385 
3386    uint32_t min_src_layer;
3387    uint32_t max_src_layer;
3388    bool src_mirror_z = false;
3389    if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
3390       compute_blit_3d_layers(region->srcOffsets,
3391                              &min_src_layer, &max_src_layer,
3392                              &src_mirror_z);
3393    } else {
3394       min_src_layer = region->srcSubresource.baseArrayLayer;
3395       max_src_layer = min_src_layer + region->srcSubresource.layerCount;
3396    }
3397 
3398    /* No Z scaling for 3D images (for non-3D images both src and dst must
3399     * have the same layerCount).
3400     */
3401    if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
3402       return false;
3403 
3404    const uint32_t layer_count = max_dst_layer - min_dst_layer;
3405    const uint32_t src_mip_level = region->srcSubresource.mipLevel;
3406    for (uint32_t i = 0; i < layer_count; i++) {
3407       /* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
3408        * only involves reversing the order of the slices.
3409        */
3410       const uint32_t dst_layer =
3411          dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
3412       const uint32_t src_layer =
3413          src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
3414 
3415       const uint32_t dst_offset =
3416          dst->planes[0].mem->bo->offset + v3dv_layer_offset(dst, dst_mip_level,
3417                                                             dst_layer, 0);
3418       const uint32_t src_offset =
3419          src->planes[0].mem->bo->offset + v3dv_layer_offset(src, src_mip_level,
3420                                                             src_layer, 0);
3421 
3422       const struct v3d_resource_slice *dst_slice = &dst->planes[0].slices[dst_mip_level];
3423       const struct v3d_resource_slice *src_slice = &src->planes[0].slices[src_mip_level];
3424 
3425       v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
3426          cmd_buffer,
3427          dst->planes[0].mem->bo->handle,
3428          dst_offset,
3429          dst_slice->tiling,
3430          dst_slice->padded_height,
3431          dst->planes[0].cpp,
3432          src->planes[0].mem->bo->handle,
3433          src_offset,
3434          src_slice->tiling,
3435          src_slice->tiling == V3D_TILING_RASTER ?
3436                               src_slice->stride : src_slice->padded_height,
3437          src->planes[0].cpp,
3438          dst_width, dst_height, &format->planes[0]);
3439    }
3440 
3441    return true;
3442 }
3443 
3444 static bool
format_needs_software_int_clamp(VkFormat format)3445 format_needs_software_int_clamp(VkFormat format)
3446 {
3447    switch (format) {
3448       case VK_FORMAT_A2R10G10B10_UINT_PACK32:
3449       case VK_FORMAT_A2R10G10B10_SINT_PACK32:
3450       case VK_FORMAT_A2B10G10R10_UINT_PACK32:
3451       case VK_FORMAT_A2B10G10R10_SINT_PACK32:
3452          return true;
3453       default:
3454          return false;
3455    };
3456 }
3457 
3458 static void
get_blit_pipeline_cache_key(VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,uint8_t * key)3459 get_blit_pipeline_cache_key(VkFormat dst_format,
3460                             VkFormat src_format,
3461                             VkColorComponentFlags cmask,
3462                             VkSampleCountFlagBits dst_samples,
3463                             VkSampleCountFlagBits src_samples,
3464                             uint8_t *key)
3465 {
3466    memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
3467 
3468    uint32_t *p = (uint32_t *) key;
3469 
3470    *p = dst_format;
3471    p++;
3472 
3473    /* Generally, when blitting from a larger format to a smaller format
3474     * the hardware takes care of clamping the source to the RT range.
3475     * Specifically, for integer formats, this is done by using
3476     * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
3477     * clamps to the bit-size of the render type, and some formats, such as
3478     * rgb10a2_uint have a 16-bit type, so it won't do what we need and we
3479     * require to clamp in software. In these cases, we need to amend the blit
3480     * shader with clamp code that depends on both the src and dst formats, so
3481     * we need the src format to be part of the key.
3482     */
3483    *p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
3484    p++;
3485 
3486    *p = cmask;
3487    p++;
3488 
3489    *p = (dst_samples << 8) | src_samples;
3490    p++;
3491 
3492    assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
3493 }
3494 
3495 static bool
create_blit_render_pass(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkRenderPass * pass_load,VkRenderPass * pass_no_load)3496 create_blit_render_pass(struct v3dv_device *device,
3497                         VkFormat dst_format,
3498                         VkFormat src_format,
3499                         VkRenderPass *pass_load,
3500                         VkRenderPass *pass_no_load)
3501 {
3502    const bool is_color_blit = vk_format_is_color(dst_format);
3503 
3504    /* Attachment load operation is specified below */
3505    VkAttachmentDescription2 att = {
3506       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
3507       .format = dst_format,
3508       .samples = VK_SAMPLE_COUNT_1_BIT,
3509       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
3510       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
3511       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
3512    };
3513 
3514    VkAttachmentReference2 att_ref = {
3515       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
3516       .attachment = 0,
3517       .layout = VK_IMAGE_LAYOUT_GENERAL,
3518    };
3519 
3520    VkSubpassDescription2 subpass = {
3521       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
3522       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
3523       .inputAttachmentCount = 0,
3524       .colorAttachmentCount = is_color_blit ? 1 : 0,
3525       .pColorAttachments = is_color_blit ? &att_ref : NULL,
3526       .pResolveAttachments = NULL,
3527       .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
3528       .preserveAttachmentCount = 0,
3529       .pPreserveAttachments = NULL,
3530    };
3531 
3532    VkRenderPassCreateInfo2 info = {
3533       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
3534       .attachmentCount = 1,
3535       .pAttachments = &att,
3536       .subpassCount = 1,
3537       .pSubpasses = &subpass,
3538       .dependencyCount = 0,
3539       .pDependencies = NULL,
3540    };
3541 
3542    VkResult result;
3543    att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
3544    result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3545                                    &info, &device->vk.alloc, pass_load);
3546    if (result != VK_SUCCESS)
3547       return false;
3548 
3549    att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
3550    result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3551                                    &info, &device->vk.alloc, pass_no_load);
3552    return result == VK_SUCCESS;
3553 }
3554 
3555 static nir_def *
gen_tex_coords(nir_builder * b)3556 gen_tex_coords(nir_builder *b)
3557 {
3558    nir_def *tex_box =
3559       nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
3560 
3561    nir_def *tex_z =
3562       nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
3563 
3564    nir_def *vertex_id = nir_load_vertex_id(b);
3565 
3566    /* vertex 0: src0_x, src0_y
3567     * vertex 1: src0_x, src1_y
3568     * vertex 2: src1_x, src0_y
3569     * vertex 3: src1_x, src1_y
3570     *
3571     * So:
3572     *
3573     * channel 0 is vertex_id < 2 ? src0_x : src1_x
3574     * channel 1 is vertex id & 1 ? src1_y : src0_y
3575     */
3576 
3577    nir_def *one = nir_imm_int(b, 1);
3578    nir_def *c0cmp = nir_ilt_imm(b, vertex_id, 2);
3579    nir_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3580 
3581    nir_def *comp[4];
3582    comp[0] = nir_bcsel(b, c0cmp,
3583                        nir_channel(b, tex_box, 0),
3584                        nir_channel(b, tex_box, 2));
3585 
3586    comp[1] = nir_bcsel(b, c1cmp,
3587                        nir_channel(b, tex_box, 3),
3588                        nir_channel(b, tex_box, 1));
3589    comp[2] = tex_z;
3590    comp[3] = nir_imm_float(b, 1.0f);
3591    return nir_vec(b, comp, 4);
3592 }
3593 
3594 static nir_def *
build_nir_tex_op_read(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type,enum glsl_sampler_dim dim)3595 build_nir_tex_op_read(struct nir_builder *b,
3596                       nir_def *tex_pos,
3597                       enum glsl_base_type tex_type,
3598                       enum glsl_sampler_dim dim)
3599 {
3600    assert(dim != GLSL_SAMPLER_DIM_MS);
3601 
3602    const struct glsl_type *sampler_type =
3603       glsl_sampler_type(dim, false, false, tex_type);
3604    nir_variable *sampler =
3605       nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3606    sampler->data.descriptor_set = 0;
3607    sampler->data.binding = 0;
3608 
3609    nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3610    nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3611    tex->sampler_dim = dim;
3612    tex->op = nir_texop_tex;
3613    tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
3614    tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
3615    tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref, tex_deref);
3616    tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3617    tex->is_array = glsl_sampler_type_is_array(sampler_type);
3618    tex->coord_components = tex_pos->num_components;
3619 
3620    nir_def_init(&tex->instr, &tex->def, 4, 32);
3621    nir_builder_instr_insert(b, &tex->instr);
3622    return &tex->def;
3623 }
3624 
3625 static nir_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder * b,nir_variable * sampler,nir_def * tex_deref,enum glsl_base_type tex_type,nir_def * tex_pos,nir_def * sample_idx)3626 build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
3627                                  nir_variable *sampler,
3628                                  nir_def *tex_deref,
3629                                  enum glsl_base_type tex_type,
3630                                  nir_def *tex_pos,
3631                                  nir_def *sample_idx)
3632 {
3633    nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3634    tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
3635    tex->op = nir_texop_txf_ms;
3636    tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
3637    tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
3638    tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_ms_index, sample_idx);
3639    tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3640    tex->is_array = false;
3641    tex->coord_components = tex_pos->num_components;
3642 
3643    nir_def_init(&tex->instr, &tex->def, 4, 32);
3644    nir_builder_instr_insert(b, &tex->instr);
3645    return &tex->def;
3646 }
3647 
3648 /* Fetches all samples at the given position and averages them */
3649 static nir_def *
build_nir_tex_op_ms_resolve(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits src_samples)3650 build_nir_tex_op_ms_resolve(struct nir_builder *b,
3651                             nir_def *tex_pos,
3652                             enum glsl_base_type tex_type,
3653                             VkSampleCountFlagBits src_samples)
3654 {
3655    assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
3656    const struct glsl_type *sampler_type =
3657       glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3658    nir_variable *sampler =
3659       nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3660    sampler->data.descriptor_set = 0;
3661    sampler->data.binding = 0;
3662 
3663    const bool is_int = glsl_base_type_is_integer(tex_type);
3664 
3665    nir_def *tmp = NULL;
3666    nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3667    for (uint32_t i = 0; i < src_samples; i++) {
3668       nir_def *s =
3669          build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3670                                           tex_type, tex_pos,
3671                                           nir_imm_int(b, i));
3672 
3673       /* For integer formats, the multisample resolve operation is expected to
3674        * return one of the samples, we just return the first one.
3675        */
3676       if (is_int)
3677          return s;
3678 
3679       tmp = i == 0 ? s : nir_fadd(b, tmp, s);
3680    }
3681 
3682    assert(!is_int);
3683    return nir_fmul_imm(b, tmp, 1.0f / src_samples);
3684 }
3685 
3686 /* Fetches the current sample (gl_SampleID) at the given position */
3687 static nir_def *
build_nir_tex_op_ms_read(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type)3688 build_nir_tex_op_ms_read(struct nir_builder *b,
3689                          nir_def *tex_pos,
3690                          enum glsl_base_type tex_type)
3691 {
3692    const struct glsl_type *sampler_type =
3693       glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3694    nir_variable *sampler =
3695       nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3696    sampler->data.descriptor_set = 0;
3697    sampler->data.binding = 0;
3698 
3699    nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3700 
3701    return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3702                                            tex_type, tex_pos,
3703                                            nir_load_sample_id(b));
3704 }
3705 
3706 static nir_def *
build_nir_tex_op(struct nir_builder * b,struct v3dv_device * device,nir_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim dim)3707 build_nir_tex_op(struct nir_builder *b,
3708                  struct v3dv_device *device,
3709                  nir_def *tex_pos,
3710                  enum glsl_base_type tex_type,
3711                  VkSampleCountFlagBits dst_samples,
3712                  VkSampleCountFlagBits src_samples,
3713                  enum glsl_sampler_dim dim)
3714 {
3715    switch (dim) {
3716    case GLSL_SAMPLER_DIM_MS:
3717       assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
3718       /* For multisampled texture sources we need to use fetching instead of
3719        * normalized texture coordinates. We already configured our blit
3720        * coordinates to be in texel units, but here we still need to convert
3721        * them from floating point to integer.
3722        */
3723       tex_pos = nir_f2i32(b, tex_pos);
3724 
3725       if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
3726          return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
3727       else
3728          return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
3729    default:
3730       assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
3731       return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
3732    }
3733 }
3734 
3735 static nir_shader *
get_blit_vs()3736 get_blit_vs()
3737 {
3738    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3739    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
3740                                                   "meta blit vs");
3741 
3742    const struct glsl_type *vec4 = glsl_vec4_type();
3743 
3744    nir_variable *vs_out_pos =
3745       nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
3746    vs_out_pos->data.location = VARYING_SLOT_POS;
3747 
3748    nir_variable *vs_out_tex_coord =
3749       nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
3750    vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
3751    vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
3752 
3753    nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
3754    nir_store_var(&b, vs_out_pos, pos, 0xf);
3755 
3756    nir_def *tex_coord = gen_tex_coords(&b);
3757    nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
3758 
3759    return b.shader;
3760 }
3761 
3762 static uint32_t
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)3763 get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
3764 {
3765    switch (sampler_dim) {
3766    case GLSL_SAMPLER_DIM_1D: return 0x1;
3767    case GLSL_SAMPLER_DIM_2D: return 0x3;
3768    case GLSL_SAMPLER_DIM_MS: return 0x3;
3769    case GLSL_SAMPLER_DIM_3D: return 0x7;
3770    default:
3771       unreachable("invalid sampler dim");
3772    };
3773 }
3774 
3775 static nir_shader *
get_color_blit_fs(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim sampler_dim)3776 get_color_blit_fs(struct v3dv_device *device,
3777                   VkFormat dst_format,
3778                   VkFormat src_format,
3779                   VkSampleCountFlagBits dst_samples,
3780                   VkSampleCountFlagBits src_samples,
3781                   enum glsl_sampler_dim sampler_dim)
3782 {
3783    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3784    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
3785                                                   "meta blit fs");
3786 
3787    const struct glsl_type *vec4 = glsl_vec4_type();
3788 
3789    nir_variable *fs_in_tex_coord =
3790       nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
3791    fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
3792 
3793    const struct glsl_type *fs_out_type =
3794       vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
3795       vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
3796                                       glsl_vec4_type();
3797 
3798    enum glsl_base_type src_base_type =
3799       vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
3800       vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
3801                                       GLSL_TYPE_FLOAT;
3802 
3803    nir_variable *fs_out_color =
3804       nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
3805    fs_out_color->data.location = FRAG_RESULT_DATA0;
3806 
3807    nir_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
3808    const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
3809    tex_coord = nir_channels(&b, tex_coord, channel_mask);
3810 
3811    nir_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
3812                                          dst_samples, src_samples, sampler_dim);
3813 
3814    /* For integer textures, if the bit-size of the destination is too small to
3815     * hold source value, Vulkan (CTS) expects the implementation to clamp to the
3816     * maximum value the destination can hold. The hardware can clamp to the
3817     * render target type, which usually matches the component bit-size, but
3818     * there are some cases that won't match, such as rgb10a2, which has a 16-bit
3819     * render target type, so in these cases we need to clamp manually.
3820     */
3821    if (format_needs_software_int_clamp(dst_format)) {
3822       assert(vk_format_is_int(dst_format));
3823       enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
3824       enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
3825 
3826       nir_def *c[4];
3827       for (uint32_t i = 0; i < 4; i++) {
3828          c[i] = nir_channel(&b, color, i);
3829 
3830          const uint32_t src_bit_size =
3831             util_format_get_component_bits(src_pformat,
3832                                            UTIL_FORMAT_COLORSPACE_RGB,
3833                                            i);
3834          const uint32_t dst_bit_size =
3835             util_format_get_component_bits(dst_pformat,
3836                                            UTIL_FORMAT_COLORSPACE_RGB,
3837                                            i);
3838 
3839          if (dst_bit_size >= src_bit_size)
3840             continue;
3841 
3842          assert(dst_bit_size > 0);
3843          if (util_format_is_pure_uint(dst_pformat)) {
3844             nir_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
3845             c[i] = nir_umin(&b, c[i], max);
3846          } else {
3847             nir_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
3848             nir_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
3849             c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
3850          }
3851       }
3852 
3853       color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
3854    }
3855 
3856    nir_store_var(&b, fs_out_color, color, 0xf);
3857 
3858    return b.shader;
3859 }
3860 
3861 static bool
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineMultisampleStateCreateInfo * ms_state,const VkPipelineLayout layout,VkPipeline * pipeline)3862 create_pipeline(struct v3dv_device *device,
3863                 struct v3dv_render_pass *pass,
3864                 struct nir_shader *vs_nir,
3865                 struct nir_shader *gs_nir,
3866                 struct nir_shader *fs_nir,
3867                 const VkPipelineVertexInputStateCreateInfo *vi_state,
3868                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
3869                 const VkPipelineColorBlendStateCreateInfo *cb_state,
3870                 const VkPipelineMultisampleStateCreateInfo *ms_state,
3871                 const VkPipelineLayout layout,
3872                 VkPipeline *pipeline)
3873 {
3874    struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
3875    struct vk_shader_module fs_m = vk_shader_module_from_nir(fs_nir);
3876    struct vk_shader_module gs_m;
3877 
3878    uint32_t num_stages = gs_nir ? 3 : 2;
3879 
3880 
3881    VkPipelineShaderStageCreateInfo stages[3] = {
3882       {
3883          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3884          .stage = VK_SHADER_STAGE_VERTEX_BIT,
3885          .module = vk_shader_module_to_handle(&vs_m),
3886          .pName = "main",
3887       },
3888       {
3889          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3890          .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3891          .module = vk_shader_module_to_handle(&fs_m),
3892          .pName = "main",
3893       },
3894       {
3895          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3896          .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
3897          .module = VK_NULL_HANDLE,
3898          .pName = "main",
3899       },
3900    };
3901 
3902    if (gs_nir) {
3903       gs_m = vk_shader_module_from_nir(gs_nir);
3904       stages[2].module = vk_shader_module_to_handle(&gs_m);
3905    }
3906 
3907    VkGraphicsPipelineCreateInfo info = {
3908       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3909 
3910       .stageCount = num_stages,
3911       .pStages = stages,
3912 
3913       .pVertexInputState = vi_state,
3914 
3915       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
3916          .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3917          .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3918          .primitiveRestartEnable = false,
3919       },
3920 
3921       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
3922          .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
3923          .viewportCount = 1,
3924          .scissorCount = 1,
3925       },
3926 
3927       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
3928          .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
3929          .rasterizerDiscardEnable = false,
3930          .polygonMode = VK_POLYGON_MODE_FILL,
3931          .cullMode = VK_CULL_MODE_NONE,
3932          .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
3933          .depthBiasEnable = false,
3934       },
3935 
3936       .pMultisampleState = ms_state,
3937 
3938       .pDepthStencilState = ds_state,
3939 
3940       .pColorBlendState = cb_state,
3941 
3942       /* The meta clear pipeline declares all state as dynamic.
3943        * As a consequence, vkCmdBindPipeline writes no dynamic state
3944        * to the cmd buffer. Therefore, at the end of the meta clear,
3945        * we need only restore dynamic state that was vkCmdSet.
3946        */
3947       .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
3948          .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
3949          .dynamicStateCount = 6,
3950          .pDynamicStates = (VkDynamicState[]) {
3951             VK_DYNAMIC_STATE_VIEWPORT,
3952             VK_DYNAMIC_STATE_SCISSOR,
3953             VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
3954             VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
3955             VK_DYNAMIC_STATE_STENCIL_REFERENCE,
3956             VK_DYNAMIC_STATE_BLEND_CONSTANTS,
3957             VK_DYNAMIC_STATE_DEPTH_BIAS,
3958             VK_DYNAMIC_STATE_LINE_WIDTH,
3959          },
3960       },
3961 
3962       .flags = 0,
3963       .layout = layout,
3964       .renderPass = v3dv_render_pass_to_handle(pass),
3965       .subpass = 0,
3966    };
3967 
3968    VkResult result =
3969       v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
3970                                    VK_NULL_HANDLE,
3971                                    1, &info,
3972                                    &device->vk.alloc,
3973                                    pipeline);
3974 
3975    ralloc_free(vs_nir);
3976    ralloc_free(gs_nir);
3977    ralloc_free(fs_nir);
3978 
3979    return result == VK_SUCCESS;
3980 }
3981 
3982 static enum glsl_sampler_dim
get_sampler_dim(VkImageType type,VkSampleCountFlagBits src_samples)3983 get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
3984 {
3985    /* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
3986     *
3987     *   "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
3988     *    VK_IMAGE_TYPE_2D, ..."
3989     */
3990    assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
3991 
3992    switch (type) {
3993    case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
3994    case VK_IMAGE_TYPE_2D:
3995       return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
3996                                                     GLSL_SAMPLER_DIM_MS;
3997    case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
3998    default:
3999       unreachable("Invalid image type");
4000    }
4001 }
4002 
4003 static bool
create_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)4004 create_blit_pipeline(struct v3dv_device *device,
4005                      VkFormat dst_format,
4006                      VkFormat src_format,
4007                      VkColorComponentFlags cmask,
4008                      VkImageType src_type,
4009                      VkSampleCountFlagBits dst_samples,
4010                      VkSampleCountFlagBits src_samples,
4011                      VkRenderPass _pass,
4012                      VkPipelineLayout pipeline_layout,
4013                      VkPipeline *pipeline)
4014 {
4015    struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
4016 
4017    /* We always rewrite depth/stencil blits to compatible color blits */
4018    assert(vk_format_is_color(dst_format));
4019    assert(vk_format_is_color(src_format));
4020 
4021    const enum glsl_sampler_dim sampler_dim =
4022       get_sampler_dim(src_type, src_samples);
4023 
4024    nir_shader *vs_nir = get_blit_vs();
4025    nir_shader *fs_nir =
4026       get_color_blit_fs(device, dst_format, src_format,
4027                         dst_samples, src_samples, sampler_dim);
4028 
4029    const VkPipelineVertexInputStateCreateInfo vi_state = {
4030       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
4031       .vertexBindingDescriptionCount = 0,
4032       .vertexAttributeDescriptionCount = 0,
4033    };
4034 
4035    VkPipelineDepthStencilStateCreateInfo ds_state = {
4036       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
4037    };
4038 
4039    VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
4040    blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
4041       .blendEnable = false,
4042       .colorWriteMask = cmask,
4043    };
4044 
4045    const VkPipelineColorBlendStateCreateInfo cb_state = {
4046       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
4047       .logicOpEnable = false,
4048       .attachmentCount = 1,
4049       .pAttachments = blend_att_state
4050    };
4051 
4052    const VkPipelineMultisampleStateCreateInfo ms_state = {
4053       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
4054       .rasterizationSamples = dst_samples,
4055       .sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
4056       .pSampleMask = NULL,
4057       .alphaToCoverageEnable = false,
4058       .alphaToOneEnable = false,
4059    };
4060 
4061    return create_pipeline(device,
4062                           pass,
4063                           vs_nir, NULL, fs_nir,
4064                           &vi_state,
4065                           &ds_state,
4066                           &cb_state,
4067                           &ms_state,
4068                           pipeline_layout,
4069                           pipeline);
4070 }
4071 
4072 /**
4073  * Return a pipeline suitable for blitting the requested aspect given the
4074  * destination and source formats.
4075  */
4076 static bool
get_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,struct v3dv_meta_blit_pipeline ** pipeline)4077 get_blit_pipeline(struct v3dv_device *device,
4078                   VkFormat dst_format,
4079                   VkFormat src_format,
4080                   VkColorComponentFlags cmask,
4081                   VkImageType src_type,
4082                   VkSampleCountFlagBits dst_samples,
4083                   VkSampleCountFlagBits src_samples,
4084                   struct v3dv_meta_blit_pipeline **pipeline)
4085 {
4086    bool ok = true;
4087 
4088    uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
4089    get_blit_pipeline_cache_key(dst_format, src_format, cmask,
4090                                dst_samples, src_samples, key);
4091    mtx_lock(&device->meta.mtx);
4092    struct hash_entry *entry =
4093       _mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
4094    if (entry) {
4095       mtx_unlock(&device->meta.mtx);
4096       *pipeline = entry->data;
4097       return true;
4098    }
4099 
4100    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
4101                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
4102 
4103    if (*pipeline == NULL)
4104       goto fail;
4105 
4106    ok = create_blit_render_pass(device, dst_format, src_format,
4107                                 &(*pipeline)->pass,
4108                                 &(*pipeline)->pass_no_load);
4109    if (!ok)
4110       goto fail;
4111 
4112    /* Create the pipeline using one of the render passes, they are both
4113     * compatible, so we don't care which one we use here.
4114     */
4115    ok = create_blit_pipeline(device,
4116                              dst_format,
4117                              src_format,
4118                              cmask,
4119                              src_type,
4120                              dst_samples,
4121                              src_samples,
4122                              (*pipeline)->pass,
4123                              device->meta.blit.p_layout,
4124                              &(*pipeline)->pipeline);
4125    if (!ok)
4126       goto fail;
4127 
4128    memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
4129    _mesa_hash_table_insert(device->meta.blit.cache[src_type],
4130                            &(*pipeline)->key, *pipeline);
4131 
4132    mtx_unlock(&device->meta.mtx);
4133    return true;
4134 
4135 fail:
4136    mtx_unlock(&device->meta.mtx);
4137 
4138    VkDevice _device = v3dv_device_to_handle(device);
4139    if (*pipeline) {
4140       if ((*pipeline)->pass)
4141          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
4142       if ((*pipeline)->pass_no_load)
4143          v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
4144       if ((*pipeline)->pipeline)
4145          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
4146       vk_free(&device->vk.alloc, *pipeline);
4147       *pipeline = NULL;
4148    }
4149 
4150    return false;
4151 }
4152 
4153 static void
compute_blit_box(const VkOffset3D * offsets,uint32_t image_w,uint32_t image_h,uint32_t * x,uint32_t * y,uint32_t * w,uint32_t * h,bool * mirror_x,bool * mirror_y)4154 compute_blit_box(const VkOffset3D *offsets,
4155                  uint32_t image_w, uint32_t image_h,
4156                  uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
4157                  bool *mirror_x, bool *mirror_y)
4158 {
4159    if (offsets[1].x >= offsets[0].x) {
4160       *mirror_x = false;
4161       *x = MIN2(offsets[0].x, image_w - 1);
4162       *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
4163    } else {
4164       *mirror_x = true;
4165       *x = MIN2(offsets[1].x, image_w - 1);
4166       *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
4167    }
4168    if (offsets[1].y >= offsets[0].y) {
4169       *mirror_y = false;
4170       *y = MIN2(offsets[0].y, image_h - 1);
4171       *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
4172    } else {
4173       *mirror_y = true;
4174       *y = MIN2(offsets[1].y, image_h - 1);
4175       *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
4176    }
4177 }
4178 
4179 static void
compute_blit_3d_layers(const VkOffset3D * offsets,uint32_t * min_layer,uint32_t * max_layer,bool * mirror_z)4180 compute_blit_3d_layers(const VkOffset3D *offsets,
4181                        uint32_t *min_layer, uint32_t *max_layer,
4182                        bool *mirror_z)
4183 {
4184    if (offsets[1].z >= offsets[0].z) {
4185       *mirror_z = false;
4186       *min_layer = offsets[0].z;
4187       *max_layer = offsets[1].z;
4188    } else {
4189       *mirror_z = true;
4190       *min_layer = offsets[1].z;
4191       *max_layer = offsets[0].z;
4192    }
4193 }
4194 
4195 static VkResult
create_blit_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)4196 create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
4197 {
4198    /* If this is not the first pool we create for this command buffer
4199     * size it based on the size of the currently exhausted pool.
4200     */
4201    uint32_t descriptor_count = 64;
4202    if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
4203       struct v3dv_descriptor_pool *exhausted_pool =
4204          v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
4205       descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
4206    }
4207 
4208    /* Create the descriptor pool */
4209    cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
4210    VkDescriptorPoolSize pool_size = {
4211       .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4212       .descriptorCount = descriptor_count,
4213    };
4214    VkDescriptorPoolCreateInfo info = {
4215       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
4216       .maxSets = descriptor_count,
4217       .poolSizeCount = 1,
4218       .pPoolSizes = &pool_size,
4219       .flags = 0,
4220    };
4221    VkResult result =
4222       v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
4223                                 &info,
4224                                 &cmd_buffer->device->vk.alloc,
4225                                 &cmd_buffer->meta.blit.dspool);
4226 
4227    if (result == VK_SUCCESS) {
4228       assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
4229       const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
4230 
4231       v3dv_cmd_buffer_add_private_obj(
4232          cmd_buffer, (uintptr_t) _pool,
4233          (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
4234 
4235       struct v3dv_descriptor_pool *pool =
4236          v3dv_descriptor_pool_from_handle(_pool);
4237       pool->is_driver_internal = true;
4238    }
4239 
4240    return result;
4241 }
4242 
4243 static VkResult
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)4244 allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
4245                                     VkDescriptorSet *set)
4246 {
4247    /* Make sure we have a descriptor pool */
4248    VkResult result;
4249    if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
4250       result = create_blit_descriptor_pool(cmd_buffer);
4251       if (result != VK_SUCCESS)
4252          return result;
4253    }
4254    assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
4255 
4256    /* Allocate descriptor set */
4257    struct v3dv_device *device = cmd_buffer->device;
4258    VkDevice _device = v3dv_device_to_handle(device);
4259    VkDescriptorSetAllocateInfo info = {
4260       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
4261       .descriptorPool = cmd_buffer->meta.blit.dspool,
4262       .descriptorSetCount = 1,
4263       .pSetLayouts = &device->meta.blit.ds_layout,
4264    };
4265    result = v3dv_AllocateDescriptorSets(_device, &info, set);
4266 
4267    /* If we ran out of pool space, grow the pool and try again */
4268    if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
4269       result = create_blit_descriptor_pool(cmd_buffer);
4270       if (result == VK_SUCCESS) {
4271          info.descriptorPool = cmd_buffer->meta.blit.dspool;
4272          result = v3dv_AllocateDescriptorSets(_device, &info, set);
4273       }
4274    }
4275 
4276    return result;
4277 }
4278 
4279 /**
4280  * Returns true if the implementation supports the requested operation (even if
4281  * it failed to process it, for example, due to an out-of-memory error).
4282  *
4283  * The caller can specify the channels on the destination to be written via the
4284  * cmask parameter (which can be 0 to default to all channels), as well as a
4285  * swizzle to apply to the source via the cswizzle parameter  (which can be NULL
4286  * to use the default identity swizzle).
4287  *
4288  * Supports multi-plane formats too.
4289  */
4290 static bool
blit_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,VkFormat dst_format,struct v3dv_image * src,VkFormat src_format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,const VkImageBlit2 * region,VkFilter filter,bool dst_is_padded_image)4291 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
4292             struct v3dv_image *dst,
4293             VkFormat dst_format,
4294             struct v3dv_image *src,
4295             VkFormat src_format,
4296             VkColorComponentFlags cmask,
4297             VkComponentMapping *cswizzle,
4298             const VkImageBlit2 *region,
4299             VkFilter filter,
4300             bool dst_is_padded_image)
4301 {
4302    bool handled = true;
4303    VkResult result;
4304 
4305    /* We don't support rendering to linear depth/stencil, this should have
4306     * been rewritten to a compatible color blit by the caller.
4307     */
4308    assert(dst->tiled || !vk_format_is_depth_or_stencil(dst_format));
4309 
4310    /* Can't sample from linear images */
4311    if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D) {
4312       return false;
4313    }
4314 
4315    /* Rewrite combined D/S blits to compatible color blits */
4316    if (vk_format_is_depth_or_stencil(dst_format)) {
4317       assert(src_format == dst_format);
4318       assert(cmask == 0);
4319       switch(dst_format) {
4320       case VK_FORMAT_D16_UNORM:
4321          dst_format = VK_FORMAT_R16_UINT;
4322          break;
4323       case VK_FORMAT_D32_SFLOAT:
4324          dst_format = VK_FORMAT_R32_UINT;
4325          break;
4326       case VK_FORMAT_X8_D24_UNORM_PACK32:
4327       case VK_FORMAT_D24_UNORM_S8_UINT:
4328          if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
4329             cmask |= VK_COLOR_COMPONENT_G_BIT |
4330                      VK_COLOR_COMPONENT_B_BIT |
4331                      VK_COLOR_COMPONENT_A_BIT;
4332          }
4333          if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
4334             assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
4335             cmask |= VK_COLOR_COMPONENT_R_BIT;
4336          }
4337          dst_format = VK_FORMAT_R8G8B8A8_UINT;
4338          break;
4339       default:
4340          unreachable("Unsupported depth/stencil format");
4341       };
4342       src_format = dst_format;
4343    }
4344 
4345    uint8_t src_plane =
4346       v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
4347    assert(src_plane < src->plane_count);
4348    uint8_t dst_plane =
4349       v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
4350    assert(dst_plane < dst->plane_count);
4351 
4352    const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
4353                                             VK_COLOR_COMPONENT_G_BIT |
4354                                             VK_COLOR_COMPONENT_B_BIT |
4355                                             VK_COLOR_COMPONENT_A_BIT;
4356    if (cmask == 0)
4357       cmask = full_cmask;
4358 
4359    VkComponentMapping ident_swizzle = {
4360       .r = VK_COMPONENT_SWIZZLE_IDENTITY,
4361       .g = VK_COMPONENT_SWIZZLE_IDENTITY,
4362       .b = VK_COMPONENT_SWIZZLE_IDENTITY,
4363       .a = VK_COMPONENT_SWIZZLE_IDENTITY,
4364    };
4365    if (!cswizzle)
4366       cswizzle = &ident_swizzle;
4367 
4368    /* When we get here from a copy between compressed / uncompressed images
4369     * we choose to specify the destination blit region based on the size
4370     * semantics of the source image of the copy (see copy_image_blit), so we
4371     * need to apply those same semantics here when we compute the size of the
4372     * destination image level.
4373     */
4374    const uint32_t dst_block_w =
4375       vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
4376    const uint32_t dst_block_h =
4377       vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
4378    const uint32_t src_block_w =
4379       vk_format_get_blockwidth(src->planes[src_plane].vk_format);
4380    const uint32_t src_block_h =
4381       vk_format_get_blockheight(src->planes[src_plane].vk_format);
4382    const uint32_t dst_level_w =
4383       u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w),
4384                region->dstSubresource.mipLevel);
4385    const uint32_t dst_level_h =
4386       u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h),
4387                region->dstSubresource.mipLevel);
4388 
4389    const uint32_t src_level_w =
4390       u_minify(src->planes[src_plane].width, region->srcSubresource.mipLevel);
4391    const uint32_t src_level_h =
4392       u_minify(src->planes[src_plane].height, region->srcSubresource.mipLevel);
4393 
4394    assert(src->plane_count == 1 || src->vk.image_type != VK_IMAGE_TYPE_3D);
4395    const uint32_t src_level_d =
4396       u_minify(src->vk.extent.depth, region->srcSubresource.mipLevel);
4397 
4398    uint32_t dst_x, dst_y, dst_w, dst_h;
4399    bool dst_mirror_x, dst_mirror_y;
4400    compute_blit_box(region->dstOffsets,
4401                     dst_level_w, dst_level_h,
4402                     &dst_x, &dst_y, &dst_w, &dst_h,
4403                     &dst_mirror_x, &dst_mirror_y);
4404 
4405    uint32_t src_x, src_y, src_w, src_h;
4406    bool src_mirror_x, src_mirror_y;
4407    compute_blit_box(region->srcOffsets,
4408                     src_level_w, src_level_h,
4409                     &src_x, &src_y, &src_w, &src_h,
4410                     &src_mirror_x, &src_mirror_y);
4411 
4412    uint32_t min_dst_layer;
4413    uint32_t max_dst_layer;
4414    bool dst_mirror_z = false;
4415    if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
4416       min_dst_layer = region->dstSubresource.baseArrayLayer;
4417       max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
4418    } else {
4419       compute_blit_3d_layers(region->dstOffsets,
4420                              &min_dst_layer, &max_dst_layer,
4421                              &dst_mirror_z);
4422    }
4423 
4424    uint32_t min_src_layer;
4425    uint32_t max_src_layer;
4426    bool src_mirror_z = false;
4427    if (src->vk.image_type != VK_IMAGE_TYPE_3D) {
4428       min_src_layer = region->srcSubresource.baseArrayLayer;
4429       max_src_layer = min_src_layer + region->srcSubresource.layerCount;
4430    } else {
4431       compute_blit_3d_layers(region->srcOffsets,
4432                              &min_src_layer, &max_src_layer,
4433                              &src_mirror_z);
4434    }
4435 
4436    uint32_t layer_count = max_dst_layer - min_dst_layer;
4437 
4438    /* Translate source blit coordinates to normalized texture coordinates for
4439     * single sampled textures. For multisampled textures we require
4440     * unnormalized coordinates, since we can only do texelFetch on them.
4441     */
4442    float coords[4] =  {
4443       (float)src_x,
4444       (float)src_y,
4445       (float)(src_x + src_w),
4446       (float)(src_y + src_h),
4447    };
4448 
4449    if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) {
4450       coords[0] /= (float)src_level_w;
4451       coords[1] /= (float)src_level_h;
4452       coords[2] /= (float)src_level_w;
4453       coords[3] /= (float)src_level_h;
4454    }
4455 
4456    /* Handle mirroring */
4457    const bool mirror_x = dst_mirror_x != src_mirror_x;
4458    const bool mirror_y = dst_mirror_y != src_mirror_y;
4459    const bool mirror_z = dst_mirror_z != src_mirror_z;
4460    float tex_coords[5] = {
4461       !mirror_x ? coords[0] : coords[2],
4462       !mirror_y ? coords[1] : coords[3],
4463       !mirror_x ? coords[2] : coords[0],
4464       !mirror_y ? coords[3] : coords[1],
4465       /* Z coordinate for 3D blit sources, to be filled for each
4466        * destination layer
4467        */
4468       0.0f
4469    };
4470 
4471    /* For blits from 3D images we also need to compute the slice coordinate to
4472     * sample from, which will change for each layer in the destination.
4473     * Compute the step we should increase for each iteration.
4474     */
4475    const float src_z_step =
4476       (float)(max_src_layer - min_src_layer) / (float)layer_count;
4477 
4478    /* Get the blit pipeline */
4479    struct v3dv_meta_blit_pipeline *pipeline = NULL;
4480    bool ok = get_blit_pipeline(cmd_buffer->device,
4481                                dst_format, src_format, cmask, src->vk.image_type,
4482                                dst->vk.samples, src->vk.samples,
4483                                &pipeline);
4484    if (!ok)
4485       return handled;
4486    assert(pipeline && pipeline->pipeline &&
4487           pipeline->pass && pipeline->pass_no_load);
4488 
4489    struct v3dv_device *device = cmd_buffer->device;
4490    assert(device->meta.blit.ds_layout);
4491 
4492    VkDevice _device = v3dv_device_to_handle(device);
4493    VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
4494 
4495    /* Create sampler for blit source image */
4496    VkSamplerCreateInfo sampler_info = {
4497       .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
4498       .magFilter = filter,
4499       .minFilter = filter,
4500       .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4501       .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4502       .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4503       .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
4504    };
4505    VkSampler sampler;
4506    result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
4507                                &sampler);
4508    if (result != VK_SUCCESS)
4509       goto fail;
4510 
4511    v3dv_cmd_buffer_add_private_obj(
4512       cmd_buffer, (uintptr_t)sampler,
4513       (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
4514 
4515    /* Push command buffer state before starting meta operation */
4516    v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
4517 
4518    /* Push state that is common for all layers */
4519    v3dv_CmdBindPipeline(_cmd_buffer,
4520                         VK_PIPELINE_BIND_POINT_GRAPHICS,
4521                         pipeline->pipeline);
4522 
4523    const VkViewport viewport = {
4524       .x = dst_x,
4525       .y = dst_y,
4526       .width = dst_w,
4527       .height = dst_h,
4528       .minDepth = 0.0f,
4529       .maxDepth = 1.0f
4530    };
4531    v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
4532 
4533    const VkRect2D scissor = {
4534       .offset = { dst_x, dst_y },
4535       .extent = { dst_w, dst_h }
4536    };
4537    v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
4538 
4539    bool can_skip_tlb_load = false;
4540    const VkRect2D render_area = {
4541       .offset = { dst_x, dst_y },
4542       .extent = { dst_w, dst_h },
4543    };
4544 
4545    /* Record per-layer commands */
4546    for (uint32_t i = 0; i < layer_count; i++) {
4547       /* Setup framebuffer */
4548       VkImageViewCreateInfo dst_image_view_info = {
4549          .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4550          .image = v3dv_image_to_handle(dst),
4551          .viewType = v3dv_image_type_to_view_type(dst->vk.image_type),
4552          .format = dst_format,
4553          .subresourceRange = {
4554             .aspectMask = region->dstSubresource.aspectMask,
4555             .baseMipLevel = region->dstSubresource.mipLevel,
4556             .levelCount = 1,
4557             .baseArrayLayer = min_dst_layer + i,
4558             .layerCount = 1
4559          },
4560       };
4561       VkImageView dst_image_view;
4562       result = v3dv_create_image_view(device, &dst_image_view_info,
4563                                       &dst_image_view);
4564       if (result != VK_SUCCESS)
4565          goto fail;
4566 
4567       v3dv_cmd_buffer_add_private_obj(
4568          cmd_buffer, (uintptr_t)dst_image_view,
4569          (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4570 
4571       VkFramebufferCreateInfo fb_info = {
4572          .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
4573          .renderPass = pipeline->pass,
4574          .attachmentCount = 1,
4575          .pAttachments = &dst_image_view,
4576          .width = dst_x + dst_w,
4577          .height = dst_y + dst_h,
4578          .layers = 1,
4579       };
4580 
4581       VkFramebuffer fb;
4582       result = v3dv_CreateFramebuffer(_device, &fb_info,
4583                                       &cmd_buffer->device->vk.alloc, &fb);
4584       if (result != VK_SUCCESS)
4585          goto fail;
4586 
4587       struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
4588       framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
4589                                       fb_info.height == dst_level_h &&
4590                                       dst_is_padded_image;
4591 
4592       v3dv_cmd_buffer_add_private_obj(
4593          cmd_buffer, (uintptr_t)fb,
4594          (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
4595 
4596       /* Setup descriptor set for blit source texture. We don't have to
4597        * register the descriptor as a private command buffer object since
4598        * all descriptors will be freed automatically with the descriptor
4599        * pool.
4600        */
4601       VkDescriptorSet set;
4602       result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
4603       if (result != VK_SUCCESS)
4604          goto fail;
4605 
4606       VkImageViewCreateInfo src_image_view_info = {
4607          .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4608          .image = v3dv_image_to_handle(src),
4609          .viewType = v3dv_image_type_to_view_type(src->vk.image_type),
4610          .format = src_format,
4611          .components = *cswizzle,
4612          .subresourceRange = {
4613             .aspectMask = region->srcSubresource.aspectMask,
4614             .baseMipLevel = region->srcSubresource.mipLevel,
4615             .levelCount = 1,
4616             .baseArrayLayer =
4617                src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
4618             .layerCount = 1
4619          },
4620       };
4621       VkImageView src_image_view;
4622       result = v3dv_create_image_view(device, &src_image_view_info,
4623                                       &src_image_view);
4624       if (result != VK_SUCCESS)
4625          goto fail;
4626 
4627       v3dv_cmd_buffer_add_private_obj(
4628          cmd_buffer, (uintptr_t)src_image_view,
4629          (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4630 
4631       VkDescriptorImageInfo image_info = {
4632          .sampler = sampler,
4633          .imageView = src_image_view,
4634          .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4635       };
4636       VkWriteDescriptorSet write = {
4637          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
4638          .dstSet = set,
4639          .dstBinding = 0,
4640          .dstArrayElement = 0,
4641          .descriptorCount = 1,
4642          .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4643          .pImageInfo = &image_info,
4644       };
4645       v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
4646 
4647       v3dv_CmdBindDescriptorSets(_cmd_buffer,
4648                                  VK_PIPELINE_BIND_POINT_GRAPHICS,
4649                                  device->meta.blit.p_layout,
4650                                  0, 1, &set,
4651                                  0, NULL);
4652 
4653       /* If the region we are about to blit is tile-aligned, then we can
4654        * use the render pass version that won't pre-load the tile buffer
4655        * with the dst image contents before the blit. The exception is when we
4656        * don't have a full color mask, since in that case we need to preserve
4657        * the original value of some of the color components.
4658        *
4659        * Since all layers have the same area, we only need to compute this for
4660        * the first.
4661        */
4662       if (i == 0) {
4663          struct v3dv_render_pass *pipeline_pass =
4664             v3dv_render_pass_from_handle(pipeline->pass);
4665          can_skip_tlb_load =
4666             cmask == full_cmask &&
4667             v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
4668                                               framebuffer, pipeline_pass, 0);
4669       }
4670 
4671       /* Record blit */
4672       VkRenderPassBeginInfo rp_info = {
4673          .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
4674          .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
4675                                            pipeline->pass,
4676          .framebuffer = fb,
4677          .renderArea = render_area,
4678          .clearValueCount = 0,
4679       };
4680 
4681       VkSubpassBeginInfo sp_info = {
4682          .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
4683          .contents = VK_SUBPASS_CONTENTS_INLINE,
4684       };
4685 
4686       v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
4687       struct v3dv_job *job = cmd_buffer->state.job;
4688       if (!job)
4689          goto fail;
4690 
4691       /* For 3D blits we need to compute the source slice to blit from (the Z
4692        * coordinate of the source sample operation). We want to choose this
4693        * based on the ratio of the depth of the source and the destination
4694        * images, picking the coordinate in the middle of each step.
4695        */
4696       if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
4697          tex_coords[4] =
4698             !mirror_z ?
4699             (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
4700             (max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
4701       }
4702 
4703       v3dv_CmdPushConstants(_cmd_buffer,
4704                             device->meta.blit.p_layout,
4705                             VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
4706                             &tex_coords);
4707 
4708       v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
4709 
4710       VkSubpassEndInfo sp_end_info = {
4711          .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
4712       };
4713 
4714       v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
4715    }
4716 
4717 fail:
4718    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, true);
4719 
4720    return handled;
4721 }
4722 
4723 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)4724 v3dv_CmdBlitImage2(VkCommandBuffer commandBuffer,
4725                       const VkBlitImageInfo2 *pBlitImageInfo)
4726 {
4727    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4728    V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
4729    V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
4730 
4731    /* From vkCmdBlitImage:
4732     *   "srcImage must not use a format that requires a sampler YCBCR
4733     *    conversion"
4734     *   "dstImage must not use a format that requires a sampler YCBCR
4735     *    conversion"
4736     */
4737    assert(src->plane_count == 1);
4738    assert(dst->plane_count == 1);
4739 
4740    /* This command can only happen outside a render pass */
4741    assert(cmd_buffer->state.pass == NULL);
4742    assert(cmd_buffer->state.job == NULL);
4743 
4744    /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
4745    assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
4746           src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4747 
4748    /* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
4749    assert(!vk_format_is_compressed(dst->vk.format));
4750 
4751    cmd_buffer->state.is_transfer = true;
4752 
4753    for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
4754       const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
4755 
4756       if (blit_tfu(cmd_buffer, dst, src, region))
4757          continue;
4758       if (blit_shader(cmd_buffer,
4759                       dst, dst->vk.format,
4760                       src, src->vk.format,
4761                       0, NULL,
4762                       region,
4763                       pBlitImageInfo->filter, true)) {
4764          continue;
4765       }
4766       unreachable("Unsupported blit operation");
4767    }
4768 
4769    cmd_buffer->state.is_transfer = false;
4770 }
4771 
4772 static bool
resolve_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4773 resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
4774                   struct v3dv_image *dst,
4775                   struct v3dv_image *src,
4776                   const VkImageResolve2 *region)
4777 {
4778    /* No resolve for multi-planar images. Using plane 0 */
4779    assert(dst->plane_count == 1);
4780    assert(src->plane_count == 1);
4781 
4782    if (!v3dv_meta_can_use_tlb(src, 0, region->srcSubresource.mipLevel,
4783                               &region->srcOffset, NULL, NULL) ||
4784        !v3dv_meta_can_use_tlb(dst, 0, region->dstSubresource.mipLevel,
4785                               &region->dstOffset, &region->extent, NULL)) {
4786       return false;
4787    }
4788 
4789    if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))
4790       return false;
4791 
4792    const VkFormat fb_format = src->vk.format;
4793 
4794    uint32_t num_layers;
4795    if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
4796       num_layers = region->dstSubresource.layerCount;
4797    else
4798       num_layers = region->extent.depth;
4799    assert(num_layers > 0);
4800 
4801    struct v3dv_job *job =
4802       v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4803    if (!job)
4804       return true;
4805 
4806    const uint32_t block_w =
4807       vk_format_get_blockwidth(dst->planes[0].vk_format);
4808    const uint32_t block_h =
4809       vk_format_get_blockheight(dst->planes[0].vk_format);
4810    const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
4811    const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
4812 
4813    uint32_t internal_type, internal_bpp;
4814    v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
4815       (fb_format, region->srcSubresource.aspectMask,
4816        &internal_type, &internal_bpp);
4817 
4818    v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
4819                         internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
4820                         true);
4821 
4822    struct v3dv_meta_framebuffer framebuffer;
4823    v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
4824                                               internal_type, &job->frame_tiling);
4825 
4826    v3dv_X(job->device, job_emit_binning_flush)(job);
4827    v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src,
4828                                                     &framebuffer, region);
4829 
4830    v3dv_cmd_buffer_finish_job(cmd_buffer);
4831    return true;
4832 }
4833 
4834 static bool
resolve_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4835 resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
4836                    struct v3dv_image *dst,
4837                    struct v3dv_image *src,
4838                    const VkImageResolve2 *region)
4839 {
4840    const VkImageBlit2 blit_region = {
4841       .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
4842       .srcSubresource = region->srcSubresource,
4843       .srcOffsets = {
4844          region->srcOffset,
4845          {
4846             region->srcOffset.x + region->extent.width,
4847             region->srcOffset.y + region->extent.height,
4848          }
4849       },
4850       .dstSubresource = region->dstSubresource,
4851       .dstOffsets = {
4852          region->dstOffset,
4853          {
4854             region->dstOffset.x + region->extent.width,
4855             region->dstOffset.y + region->extent.height,
4856          }
4857       },
4858    };
4859    return blit_shader(cmd_buffer,
4860                       dst, dst->vk.format,
4861                       src, src->vk.format,
4862                       0, NULL,
4863                       &blit_region, VK_FILTER_NEAREST, true);
4864 }
4865 
4866 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)4867 v3dv_CmdResolveImage2(VkCommandBuffer commandBuffer,
4868                          const VkResolveImageInfo2 *info)
4869 
4870 {
4871    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4872    V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
4873    V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
4874 
4875     /* This command can only happen outside a render pass */
4876    assert(cmd_buffer->state.pass == NULL);
4877    assert(cmd_buffer->state.job == NULL);
4878 
4879    assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT);
4880    assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4881 
4882    /* We don't support multi-sampled multi-plane images */
4883    assert(src->plane_count == 1);
4884    assert(dst->plane_count == 1);
4885 
4886    cmd_buffer->state.is_transfer = true;
4887 
4888    for (uint32_t i = 0; i < info->regionCount; i++) {
4889       if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
4890          continue;
4891       if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
4892          continue;
4893       unreachable("Unsupported multismaple resolve operation");
4894    }
4895 
4896    cmd_buffer->state.is_transfer = false;
4897 }
4898